library(Biostrings)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.1
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
library(ggplot2)
library(gggenes)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
## 
##     collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following object is masked from 'package:XVector':
## 
##     slice
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine

Sed trick to shorten the names downloaded from ncbi

Sequences downloaded from ncbi have long names that I reformat this way:

First use this :

sed -E -i '' 's/(^>[A-Z]*_[0-9][0-9]*)\.[0-9]_.*\[(.*)]/\1_\2/g' contig_*.aln

This will change the names such as : >YP_009337856.1_RNA-dependent_RNA_polymerase_[Wenling_crustacean_virus_14] into
>YP_009337856.1_Wenling_crustacean_virus_14

Then some sequences have different structure names. This second sed command will reformat them also :

sed -E  -i '' 's/(^>[A-Z][A-Z]*[0-9][0-9]*)\.[0-9]_.*\[(.*)]/\1_\2/g' contig_*.aln 

this will change ids such as >AWA82254.1_hypothetical_protein,partial[Mogami_virus] into >AWA82254_Mogami_virus

Finally, remove any special characters that are not allowed in sequence names in Seaview.

sed -i '' 's/(//g' contig*.aln
sed -i '' 's/)//g' contig*.aln
sed -i '' 's/,//g' contig*.aln
sed -i '' 's/;//g' contig*.aln
sed -i '' 's/://g' contig*.aln

Sequences homologous to ours were obtained by a blastp approach (mmseqs2), using a 10-10 evalue threshold. For some of our sequences this approach retrieved too many sequences for ML phylogenetic reconstruction. In that case, we first built a NJ phylogeny to select a subset of sequences used in the ML phylogenies.

New DNA viruses

prepare data

import all WGA contigs and their gff

contigs_wga=readBStringSet("../sequences/wga_final_contigs_with_unassigned.fa")
head(contigs_wga)
## BStringSet object of length 6:
##     width seq                                               names               
## [1]  1613 AGGAGAAGGAGAGACTAAAAAGC...TCGAAGCGTAAGGGAAAAGGAGC contig_10471
## [2]  1503 TCTGGGCTGGTCTTATGGGGGGA...TTCATGATGTCCGTCAATTCTGT contig_11634
## [3]  1484 TTTTTTTTTCAAGCAGAAGACGG...ATGGCCCCCCACCCCCAAACACA contig_11850
## [4]  1450 TTGTTGAAATTTATTAAATAAAT...GTTTATAAATAATTTTTTTTTGC contig_12283
## [5] 10490 TTATAAATTTTAAATGAAAAGAA...TTTTTGGGTCACAATGGTTTTTG contig_1269
## [6]  7285 TCATTTAAAAAATTTACAAATTT...GAAAAAAAATATTATAAAAAAAA contig_1350
gff_wga=read.table("../sequences/wga_final_contigs.gff")
# add unassigned contigs
gff_wga_unassigned0=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option0.gff")
gff_wga_unassigned1=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option1.gff")
# we will use the phase column to indicate which getorf option has been used : 1=>1; 2=>0
names(gff_wga)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wga_unassigned0)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wga_unassigned1)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
gff_wga$phase=1
gff_wga_unassigned1$phase=1
gff_wga_unassigned0$phase=2

gff_wga=rbind(gff_wga, gff_wga_unassigned0, gff_wga_unassigned1)

gff_wga$strand=as.factor(gff_wga$strand)
levels(gff_wga$strand)=c("FALSE", "TRUE")
gff_wga$phase=as.factor(gff_wga$phase)

head(gff_wga)

Add contig length

contig_length=width(contigs_wga)
names(contig_length)=names(contigs_wga)
gff_wga=merge(gff_wga, contig_length, by.x="seqid", by.y="row.names")
names(gff_wga)[10]="seq_length"

Add an orf id column

orf_names=paste(gff_wga$seqid, gff_wga$start, gff_wga$end, gff_wga$strand, sep="_")
orf_names=sub(pattern = "TRUE", "+", x = orf_names)
orf_names=sub(pattern = "FALSE", "-", x = orf_names)
gff_wga$orf_name=orf_names

import blastp results

wga_blast=read.table("../TABLES/wga_final_contigs_getorf.blastp.tab")
names=c("query_id", "subject_id", "identity", "alignment_length", "mismatches", "gap_opens", "qstart", "qend", "sstart", "send", "evalue", "bitscore")
names(wga_blast)=names
head(wga_blast)

import subject_id sequences (with informative names)

subject_id_seqs=readBStringSet("../sequences/wga_protein_homologs.fasta")
subject_id_seqs
## BStringSet object of length 1230:
##        width seq                                            names               
##    [1]   604 MNIVENSIFLSNLMKSANTFEL...SLRKCPICRSTIKGTVRTFLS NP_001156.1 bacul...
##    [2]   618 MHKTASQRLFPGPSYQNIKSIM...SLRKCPICRGIIKGTVRTFLS NP_001157.1 bacul...
##    [3]   148 MKALIVLGLVLLSVTVQGKVFE...VAWRNRCQNRDVRQYVQGCGV NP_000230.1 lysoz...
##    [4]   437 MTTSTLQKAIDLVTKATEEDKA...TVNADDLLKVKKFSEDFGQES NP_037377.1 vacuo...
##    [5]   618 MTDRGTNNDDWYIVDEAECRDD...ESDGKPQQPLRLATRAASNSI NP_040898.1 hypot...
##    ...   ... ...
## [1226]   140 MKAFFALVLLAIAASAMAGRTL...WSAWAVWHYCSGWLPSIDECF XP_016022646.2 ly...
## [1227]   140 MKAFIVLVALACAAPAFARTMD...WSAWSTWHYCSGWLPSIDDCF XP_039149016.1 ly...
## [1228]   140 MKAFIVLVALACAAPAFARTMD...WSAWSTWHYCSGWLPSIDDCF XP_016029777.2 ly...
## [1229]  1033 MFPPRLLRIAFVICLLIVLLSP...ENVCDWPENVEGCHTPTEAPA XP_039149439.1 pr...
## [1230]   159 MKAWGTVVVTLATLMVVTVDAK...DLSEWLKGCDMHVKIDPKIHP NP_001381227.1 sp...
short_names=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){return(x[1])}))
annotation=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){
  res=paste0(x[-1],collapse = " ")
  return(res)}))
df=data.frame(short_names, annotation)

Add this information to blast output

wga_blast2=merge(wga_blast, df, by.x="subject_id", by.y="short_names", all.x=TRUE, all.y=FALSE)

Combine this to the gff

# select the smallest evalue for each query (group)
wga_blast2_besthit = wga_blast2 %>% group_by(query_id) %>% arrange(evalue) %>% dplyr::slice(1)
#wga_blast2_besthit=arrange(wga_blast2, evalue)
gff_wga2=merge(gff_wga, wga_blast2_besthit, by.x="orf_name", by.y="query_id", all.x=TRUE, all.y=FALSE)

write to disk:

write.table(gff_wga2, file = "../figures/orf_predictions/gff_wga2.txt", row.names = FALSE, col.names = TRUE, quote=FALSE, sep="\t")
head(gff_wga2)

import taxonomic information for all sequences involved in the phylogenies

wga_taxo_info=read.table("../TABLES/wga_protein_homologs.ids_taxid.txt", sep=";", h=TRUE)
dim(wga_taxo_info)
## [1] 1230   28
head(wga_taxo_info)

Define family colours

library(RColorBrewer)
wga_taxo_info$family_colour=as.factor(wga_taxo_info$family)
levels(wga_taxo_info$family_colour)=c(  brewer.pal(12, name = "Set3"), brewer.pal(12, name = "Paired"))

Function to plot phylogenies

library(ggplot2)
library(ape)
## 
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
## 
##     where
## The following object is masked from 'package:Biostrings':
## 
##     complement
library(ggtree)
## ggtree v3.8.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
## 
## Guangchuang Yu.  Data Integration, Manipulation and Visualization of
## Phylogenetic Trees (1st edition). Chapman and Hall/CRC. 2022,
## doi:10.1201/9781003279242
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
## The following object is masked from 'package:Biostrings':
## 
##     collapse
## The following object is masked from 'package:IRanges':
## 
##     collapse
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)

plot_phylogeny=function(file="../phylogenies/contig_2320_1853_2266_+_with_homologs-PhyML_tree3", taxo_info=wga_taxo_info){
#file="../phylogenies/contig_All_reoviruses_non_redundant-PhyML_tree"
# replace brackets 
command1=paste0("/opt/homebrew/bin/gsed -i 's/\\[/-/g' ", as.character(file))
command2=paste0("/opt/homebrew/bin/gsed -i 's/\\]//g' ", as.character(file))
system(command1)
system(command2)
tree=read.tree(file, comment.char = "")

# remove .[number of version] in accession numbers to match the accession numbers in aln files.
taxo_info$target = gsub(pattern = "\\.[0-9]", replacement = "", x = taxo_info$target)

# add metadata :
prot_ids=unlist(lapply(strsplit(tree$tip.label,"_[a-zA-Z]"), FUN=function(x){return(paste0(x[1]))}))
# remove .[number of version] in target numbers to match the target numbers in aln files.
prot_ids = gsub(pattern = "\\.[0-9]", replacement = "", x = prot_ids)

d=data.frame(tree$tip.label, prot_ids)
dd=merge(d, taxo_info, by.x="prot_ids", by.y="target", all.x=TRUE, all.y=FALSE)
dd[grep(pattern = "contig", x = dd$prot_ids), "family" ]="This paper"
dd$family[is.na(dd$family)]="Unassigned" # replace NA by unassigned

dd=dd[, c(2, 1, 4:dim(dd)[2])] # IMPORTANT : reorder columns : The only requirement of the input data is that its first column should be matched with the node/tip labels of the tree. 
# https://bioconductor.riken.jp/packages/3.4/bioc/vignettes/ggtree/inst/doc/treeAnnotation.html

# colour code
ddd=read.table("../TABLES/colour_code_virus_families.txt", header=TRUE, sep="\t", comment.char = "")
ddd=as_tibble(ddd)
names(ddd)=c("family", "family_colour")
ddd=ddd[order(ddd$family),]
# subset to colour levels present in the phylogeny
ddd=ddd[ddd$family %in% unique(dd$family),]

p = ggtree(tree) 
p <- p %<+% dd +
  theme_tree() + 
  geom_text2(aes(label=label, subset = as.numeric(label) > 0.7), size=2) + # plot only aLRT >0.70
  geom_treescale(x=0, y=-2, linesize= 0.25,fontsize=5) + # adds the scale 
  xlim(0, 7) + theme(#legend.position = "left", 
    legend.position = c("right"),
    legend.key.size = unit(20, 'cm'), #change legend key size
    legend.key.height = unit(1, 'cm'), #change legend key height
    legend.key.width = unit(1, 'cm'), #change legend key width
    legend.title = element_blank(), #change legend title font size
    legend.text = element_text(size=12), #change legend text font size
    aspect.ratio = 1
  )

p = p + geom_tiplab(aes(color = family), size=3) + 
  scale_color_manual(values=as.vector(ddd$family_colour)) 

p = p + guides(color = guide_legend(override.aes = list(size = 6))) # to modify legend symbol size 



return(p)
}

Create a list to put tentative virus names, their corresponding contigs (either with or without hits). Contigs without hits co-occur with the other ones (dark matter…)

virus_list=list()

Function for plotting orf predictions

plot_orfs=function(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned, gff=gff_wga, name="name"){

tab=gff[which(gff$seqid %in% c(contig_set, contig_set_unassigned)),]
print(tab)

p=ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand, label= annotation)) + 
  geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1)  + 
  theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2) +  geom_segment(aes(y = seqid, yend = seqid, x=0), xend = -100000, colour = "white", size = 2) + xlab("") + ylab("")

ggsave(
  filename = paste0(paste0("../figures/orf_predictions/", name,".pdf")), 
  plot = p, device = "pdf"
)
p

return(list(p, tab))

}

Diaphorina_citri_densovirus in Pachycrepoideus sp.

Define the corresponding contigs :

contig_set="contig_2320"
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Parvoviridae_Pachy'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Parvoviridae_Pachy")
##                    orf_name       seqid    source type start  end score strand
## 780  contig_2320_144_1853_+ contig_2320 getorf_JV gene   144 1853     .   TRUE
## 781 contig_2320_1853_2266_+ contig_2320 getorf_JV gene  1853 2266     .   TRUE
## 782 contig_2320_2273_2926_- contig_2320 getorf_JV gene  2273 2926     .  FALSE
## 783 contig_2320_2607_3260_- contig_2320 getorf_JV gene  2607 3260     .  FALSE
## 784 contig_2320_3303_4526_- contig_2320 getorf_JV gene  3303 4526     .  FALSE
## 785 contig_2320_4530_4817_- contig_2320 getorf_JV gene  4530 4817     .  FALSE
## 786 contig_2320_4874_5044_+ contig_2320 getorf_JV gene  4874 5044     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 780     1       1710       5045 YP_009552708.1    0.262              444
## 781     1        414       5045 YP_009256211.1    0.529              119
## 782     1        654       5045           <NA>       NA               NA
## 783     1        654       5045           <NA>       NA               NA
## 784     1       1224       5045 YP_009256212.1    0.313              201
## 785     1        288       5045    NP_051016.1    0.473               91
## 786     1        171       5045           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 780        262         0    120  563     87  442 1.890e-23      111
## 781         56         0      2  120    557  675 6.803e-38      142
## 782         NA        NA     NA   NA     NA   NA        NA       NA
## 783         NA        NA     NA   NA     NA   NA        NA       NA
## 784        135         0    211  407    185  385 4.945e-25      114
## 785         45         0      5   95    152  237 1.055e-13       70
## 786         NA        NA     NA   NA     NA   NA        NA       NA
##                                                           annotation
## 780                       putative NS1 [Bombus cryptarum densovirus]
## 781 putative nonstructural protein NS1 [Diaphorina citri densovirus]
## 782                                                             <NA>
## 783                                                             <NA>
## 784        putative structural protein [Diaphorina citri densovirus]
## 785           structural protein [Periplaneta fuliginosa densovirus]
## 786                                                             <NA>
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name       seqid    source type start  end score strand
## 780  contig_2320_144_1853_+ contig_2320 getorf_JV gene   144 1853     .   TRUE
## 781 contig_2320_1853_2266_+ contig_2320 getorf_JV gene  1853 2266     .   TRUE
## 782 contig_2320_2273_2926_- contig_2320 getorf_JV gene  2273 2926     .  FALSE
## 783 contig_2320_2607_3260_- contig_2320 getorf_JV gene  2607 3260     .  FALSE
## 784 contig_2320_3303_4526_- contig_2320 getorf_JV gene  3303 4526     .  FALSE
## 785 contig_2320_4530_4817_- contig_2320 getorf_JV gene  4530 4817     .  FALSE
## 786 contig_2320_4874_5044_+ contig_2320 getorf_JV gene  4874 5044     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 780     1       1710       5045 YP_009552708.1    0.262              444
## 781     1        414       5045 YP_009256211.1    0.529              119
## 782     1        654       5045           <NA>       NA               NA
## 783     1        654       5045           <NA>       NA               NA
## 784     1       1224       5045 YP_009256212.1    0.313              201
## 785     1        288       5045    NP_051016.1    0.473               91
## 786     1        171       5045           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 780        262         0    120  563     87  442 1.890e-23      111
## 781         56         0      2  120    557  675 6.803e-38      142
## 782         NA        NA     NA   NA     NA   NA        NA       NA
## 783         NA        NA     NA   NA     NA   NA        NA       NA
## 784        135         0    211  407    185  385 4.945e-25      114
## 785         45         0      5   95    152  237 1.055e-13       70
## 786         NA        NA     NA   NA     NA   NA        NA       NA
##                                                           annotation
## 780                       putative NS1 [Bombus cryptarum densovirus]
## 781 putative nonstructural protein NS1 [Diaphorina citri densovirus]
## 782                                                             <NA>
## 783                                                             <NA>
## 784        putative structural protein [Diaphorina citri densovirus]
## 785           structural protein [Periplaneta fuliginosa densovirus]
## 786                                                             <NA>

The third and fourth ORF do overlap a lot. They were kept by our orf predictor script because they have exactly the same length (674nt). None of them have homologs in db.

tab=res[[2]]
tab$annotation2=c("NS prot", "NS prot", "", "" , "structural prot", "struct prot", "")


p=ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand)) + 
  geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1)  + 
  theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2) +  geom_segment(aes(y = seqid, yend = seqid, x=0), xend = -100000, colour = "white", size = 2) +
  xlab("")+ylab("")

ggsave(
  filename = paste0("../figures/orf_predictions/densovirus.pdf"), 
  plot = p
)
## Saving 7 x 5 in image
p

We found the typical inverted terminal repeat (ITR) at the 3’end (but not at the 5’ end).

 Score = 198 bits (107),  Expect = 2e-51
 Identities = 148/165 (90%), Gaps = 14/165 (8%)
 Strand=Plus/Minus

Query  4882  TCGAGTGAAACTGACGAACCTCAAAGCCCCTCCTCGATGTATACCCCCAACACACAAAAC  4941
             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct  5039  TCGAGTGAAACTGACGAACCTCAAAGCCCCTCCTCGATGTATACCCCCAACACACAAAAC  4980

Query  4942  CATGGCCTATATAATCATGACAAAGTC--GA-T-TAT-GGCC--GGTTTTGTGTGTTGGG  4994
             |  ||||   ||||||  |||   |||  || | ||| ||||  ||||||||||||||||
Sbjct  4979  C--GGCC---ATAATC--GACTTTGTCATGATTATATAGGCCATGGTTTTGTGTGTTGGG  4927

Query  4995  GGTATACATCGAGGAGGGGCTTTGAGGTTCGTCAGTTTCACTCGA  5039
             |||||||||||||||||||||||||||||||||||||||||||||
Sbjct  4926  GGTATACATCGAGGAGGGGCTTTGAGGTTCGTCAGTTTCACTCGA  4882

We built a phylogenetic tree based on NS1 protein (ORF 1853-2266).

p = plot_phylogeny("../phylogenies/contig_2320_1853_2266_+_with_homologs-PhyML_tree", taxo_info = wga_taxo_info)
p 
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

ref : see Nigg & Falk 2020 JGV

Drosophila subobscura Vesanto virus

12 segments are found, 2 of which have no homologs but share the same distribution. Note that 2 additional ones have similar distribution but were discarded because they have low coding density ( “contig_21183”, “contig_20579”).

Define the corresponding contigs :

contig_set=c("contig_2799", "contig_14992", "contig_2780", "contig_2857", "contig_22871", "contig_2659", "contig_8503", "contig_15585")
contig_set_unassigned=c("contig_7654", "contig_17519")

writeXStringSet(contigs_wga[contig_set], "../sequences/Vesantovirus_D.sub.fa")
writeXStringSet(contigs_wga[contig_set_unassigned], "../sequences/Vesantovirus_D.sub_unassigned.fa")

# store for later fusion of corresponding lines
virus_list$'Vesantovirus_D.sub'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
tab=gff_wga2[which(gff_wga2$seqid %in% c(contig_set,contig_set_unassigned)),]
# contig_22871_59_3268_+  and contig_2659_3157_3486_ are not annotated correctly. modify
tab$annotation[tab$orf_name=="contig_22871_59_3268_+"]="DNA pol [Vesanto virus]"
tab$annotation[tab$orf_name=="contig_2659_3157_3486_-"]=""
tab
p=ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand)) + 
  geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1)  + 
  theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2) +  geom_segment(aes(y = seqid, yend = seqid, x=0), xend = -100000, colour = "white", size = 2) +
  xlab("")+ylab("")

ggsave(
  filename = paste0("../figures/orf_predictions/Vesantovirus_Dsub.pdf"), 
  plot = p
)
## Saving 7 x 5 in image
p

Looking at the assembly graph revealed the presence of inverted terminal repeats for the different segments, as is expected for segmented viruses. However, based on our data (only composed of short-reads), it is rather unclear as to how reconstitute the whole segment sequences. The contigs we deposited in the databases should thus be seen as incomplete fragments of the different segments and requires further analysis to get the full sequence organization.

graph seen using Bandage software. Colours represent sequence homology with the 8 contigs.

Phylogeny

We built a phylogenetic tree based on ORF 940-2433 corresponding to NS1 protein.

p = plot_phylogeny(file = "../phylogenies/contig_2659_940_2433_+_with_homologs-PhyML_tree")
p= p + xlim(0,16)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2659_940_2433_+_with_homologs-PhyML_tree2.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Looking at unassigned contigs revealed that 4 contigs co-occurred with the other 8 contigs identified previously by protein sequence homology (blastx). Three out of four unassigned contigs (contigs 7654, 21183, 20579, 17519) have clear DNA sequence homology with segment 12 of a Vesantovirus found in D. melanogaster (MT496878.1). They may correspond to three fragments of a single segment that we were unable to assemble. We were unable to identify any sequence homology (neither at the protein level, nor at the nucleotide level) for contig 7654 (1991bp). It is however very likely that this sequence do belong to the Vesantovirus genome.

Ambidensoviruses (Parvoviridae2) in Lh, Tricho, D. kuntzei and others.

this homology was detected in the second round of blastx (mmseqs2) on whole nr

contig_set=c("contig_15192")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Parvoviridae2'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Parvoviridae2")
##                  orf_name        seqid    source type start  end score strand
## 254 contig_15192_2_1099_- contig_15192 getorf_JV gene     2 1099     .  FALSE
## 255 contig_15192_2_1219_- contig_15192 getorf_JV gene     2 1219     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 254     1       1098       1263       <NA>       NA               NA         NA
## 255     2       1218       1263       <NA>       NA               NA         NA
##     gap_opens qstart qend sstart send evalue bitscore annotation
## 254        NA     NA   NA     NA   NA     NA       NA       <NA>
## 255        NA     NA   NA     NA   NA     NA       NA       <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                  orf_name        seqid    source type start  end score strand
## 254 contig_15192_2_1099_- contig_15192 getorf_JV gene     2 1099     .  FALSE
## 255 contig_15192_2_1219_- contig_15192 getorf_JV gene     2 1219     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 254     1       1098       1263       <NA>       NA               NA         NA
## 255     2       1218       1263       <NA>       NA               NA         NA
##     gap_opens qstart qend sstart send evalue bitscore annotation
## 254        NA     NA   NA     NA   NA     NA       NA       <NA>
## 255        NA     NA   NA     NA   NA     NA       NA       <NA>

Phylogeny

We built a phylogenetic tree based on ORF 940-2433 corresponding to NS1 protein.

p = plot_phylogeny(file = "../phylogenies/contig_15192_homolog_nr-PhyML_tree")
p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

#ggsave(filename = "../phylogenies/contig_15192_homolog_nr-PhyML_tree.pdf", plot = p)

previousy described DNA viruses

linvill road virus

Previously described in D. simulans as we found also.

contig_set=c("contig_627", "contig_626")
contig_set_unassigned=NA
writeXStringSet(contigs_wga[contig_set], "../sequences/Linvil_road_virus_D.sim.fa")
# store for later fusion of corresponding lines
virus_list$'Linvill_road_virus_D.sim'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Linvill_road_virus_D.sim")
##                    orf_name      seqid    source type start  end score strand
## 1041   contig_626_120_659_+ contig_626 getorf_JV gene   120  659     .   TRUE
## 1042 contig_626_1297_2685_- contig_626 getorf_JV gene  1297 2685     .  FALSE
## 1043 contig_626_2723_3127_- contig_626 getorf_JV gene  2723 3127     .  FALSE
## 1044 contig_626_3206_3856_- contig_626 getorf_JV gene  3206 3856     .  FALSE
## 1045   contig_626_649_999_- contig_626 getorf_JV gene   649  999     .  FALSE
## 1046    contig_627_2_1036_- contig_627 getorf_JV gene     2 1036     .  FALSE
##      phase attributes seq_length  subject_id identity alignment_length
## 1041     1        540       3917        <NA>       NA               NA
## 1042     1       1389       3917  AQN78651.1    0.991              463
## 1043     1        405       3917 NP_051016.1    0.445               92
## 1044     1        651       3917  AQN78650.1    1.000              217
## 1045     1        351       3917        <NA>       NA               NA
## 1046     1       1035       1154  AQN78650.1    0.997              345
##      mismatches gap_opens qstart qend sstart send     evalue bitscore
## 1041         NA        NA     NA   NA     NA   NA         NA       NA
## 1042          4         0      1  463      1  463 6.637e-314      957
## 1043         50         0      6   97    152  242  4.364e-18       85
## 1044          0         0      1  217    358  574 1.606e-148      465
## 1045         NA        NA     NA   NA     NA   NA         NA       NA
## 1046          1         0      1  345      1  345 2.570e-228      703
##                                                  annotation
## 1041                                                   <NA>
## 1042                putative protein 2 [Linvill Road virus]
## 1043 structural protein [Periplaneta fuliginosa densovirus]
## 1044                putative protein 1 [Linvill Road virus]
## 1045                                                   <NA>
## 1046                putative protein 1 [Linvill Road virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name      seqid    source type start  end score strand
## 1041   contig_626_120_659_+ contig_626 getorf_JV gene   120  659     .   TRUE
## 1042 contig_626_1297_2685_- contig_626 getorf_JV gene  1297 2685     .  FALSE
## 1043 contig_626_2723_3127_- contig_626 getorf_JV gene  2723 3127     .  FALSE
## 1044 contig_626_3206_3856_- contig_626 getorf_JV gene  3206 3856     .  FALSE
## 1045   contig_626_649_999_- contig_626 getorf_JV gene   649  999     .  FALSE
## 1046    contig_627_2_1036_- contig_627 getorf_JV gene     2 1036     .  FALSE
##      phase attributes seq_length  subject_id identity alignment_length
## 1041     1        540       3917        <NA>       NA               NA
## 1042     1       1389       3917  AQN78651.1    0.991              463
## 1043     1        405       3917 NP_051016.1    0.445               92
## 1044     1        651       3917  AQN78650.1    1.000              217
## 1045     1        351       3917        <NA>       NA               NA
## 1046     1       1035       1154  AQN78650.1    0.997              345
##      mismatches gap_opens qstart qend sstart send     evalue bitscore
## 1041         NA        NA     NA   NA     NA   NA         NA       NA
## 1042          4         0      1  463      1  463 6.637e-314      957
## 1043         50         0      6   97    152  242  4.364e-18       85
## 1044          0         0      1  217    358  574 1.606e-148      465
## 1045         NA        NA     NA   NA     NA   NA         NA       NA
## 1046          1         0      1  345      1  345 2.570e-228      703
##                                                  annotation
## 1041                                                   <NA>
## 1042                putative protein 2 [Linvill Road virus]
## 1043 structural protein [Periplaneta fuliginosa densovirus]
## 1044                putative protein 1 [Linvill Road virus]
## 1045                                                   <NA>
## 1046                putative protein 1 [Linvill Road virus]

LbFV_L.b

contig_set=paste0("contig_", c(1505, 22345, 1350, 22895, 22365, 22449, 22533, 19307,12283,22381))
# store for later fusion of corresponding lines
virus_list$'LbFV_L.b'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "LbFV_L.b")
##                       orf_name        seqid    source type start   end score
## 132       contig_12283_3_152_- contig_12283 getorf_JV gene     3   152     .
## 133     contig_12283_387_539_- contig_12283 getorf_JV gene   387   539     .
## 134     contig_12283_693_878_- contig_12283 getorf_JV gene   693   878     .
## 197    contig_1350_1158_1364_+  contig_1350 getorf_JV gene  1158  1364     .
## 198    contig_1350_1472_1645_-  contig_1350 getorf_JV gene  1472  1645     .
## 199    contig_1350_1824_2081_-  contig_1350 getorf_JV gene  1824  2081     .
## 200    contig_1350_2089_4023_+  contig_1350 getorf_JV gene  2089  4023     .
## 201        contig_1350_3_257_-  contig_1350 getorf_JV gene     3   257     .
## 202      contig_1350_315_680_-  contig_1350 getorf_JV gene   315   680     .
## 203    contig_1350_4200_5126_+  contig_1350 getorf_JV gene  4200  5126     .
## 204    contig_1350_5229_6740_-  contig_1350 getorf_JV gene  5229  6740     .
## 205      contig_1350_680_997_-  contig_1350 getorf_JV gene   680   997     .
## 206    contig_1350_6947_7102_-  contig_1350 getorf_JV gene  6947  7102     .
## 246    contig_1505_1234_3102_-  contig_1505 getorf_JV gene  1234  3102     .
## 247       contig_1505_2_1186_-  contig_1505 getorf_JV gene     2  1186     .
## 248    contig_1505_3572_5626_+  contig_1505 getorf_JV gene  3572  5626     .
## 249    contig_1505_5654_6397_-  contig_1505 getorf_JV gene  5654  6397     .
## 478     contig_19307_248_433_+ contig_19307 getorf_JV gene   248   433     .
## 479     contig_19307_587_739_+ contig_19307 getorf_JV gene   587   739     .
## 649   contig_22345_1017_2027_+ contig_22345 getorf_JV gene  1017  2027     .
## 650 contig_22345_12095_12271_- contig_22345 getorf_JV gene 12095 12271     .
## 651 contig_22345_12686_15022_+ contig_22345 getorf_JV gene 12686 15022     .
## 652 contig_22345_15388_15663_+ contig_22345 getorf_JV gene 15388 15663     .
## 653 contig_22345_15826_16143_- contig_22345 getorf_JV gene 15826 16143     .
## 654 contig_22345_16234_16485_- contig_22345 getorf_JV gene 16234 16485     .
## 655 contig_22345_16630_17409_- contig_22345 getorf_JV gene 16630 17409     .
## 656 contig_22345_17417_17965_- contig_22345 getorf_JV gene 17417 17965     .
## 657     contig_22345_179_625_- contig_22345 getorf_JV gene   179   625     .
## 658 contig_22345_18010_20094_- contig_22345 getorf_JV gene 18010 20094     .
## 659 contig_22345_20331_21344_+ contig_22345 getorf_JV gene 20331 21344     .
## 660   contig_22345_2055_2882_+ contig_22345 getorf_JV gene  2055  2882     .
## 661 contig_22345_21392_21547_- contig_22345 getorf_JV gene 21392 21547     .
## 662 contig_22345_21617_22618_- contig_22345 getorf_JV gene 21617 22618     .
## 663 contig_22345_22700_23647_- contig_22345 getorf_JV gene 22700 23647     .
## 664 contig_22345_23792_23962_+ contig_22345 getorf_JV gene 23792 23962     .
## 665   contig_22345_2958_4103_+ contig_22345 getorf_JV gene  2958  4103     .
## 666   contig_22345_4521_5198_- contig_22345 getorf_JV gene  4521  5198     .
## 667   contig_22345_5208_5906_- contig_22345 getorf_JV gene  5208  5906     .
## 668   contig_22345_5931_6245_- contig_22345 getorf_JV gene  5931  6245     .
## 669   contig_22345_6770_7300_- contig_22345 getorf_JV gene  6770  7300     .
## 670     contig_22345_694_918_- contig_22345 getorf_JV gene   694   918     .
## 671   contig_22345_7624_8520_- contig_22345 getorf_JV gene  7624  8520     .
## 672  contig_22345_9649_11379_+ contig_22345 getorf_JV gene  9649 11379     .
## 673 contig_22365_10325_10870_- contig_22365 getorf_JV gene 10325 10870     .
## 674 contig_22365_10956_11165_- contig_22365 getorf_JV gene 10956 11165     .
## 675 contig_22365_11308_16086_- contig_22365 getorf_JV gene 11308 16086     .
## 676   contig_22365_1219_1794_+ contig_22365 getorf_JV gene  1219  1794     .
## 677 contig_22365_16399_16557_- contig_22365 getorf_JV gene 16399 16557     .
## 678 contig_22365_16643_17776_- contig_22365 getorf_JV gene 16643 17776     .
## 679 contig_22365_18040_18654_+ contig_22365 getorf_JV gene 18040 18654     .
## 680   contig_22365_1842_2921_+ contig_22365 getorf_JV gene  1842  2921     .
## 681 contig_22365_18779_19024_- contig_22365 getorf_JV gene 18779 19024     .
## 682 contig_22365_19027_19554_- contig_22365 getorf_JV gene 19027 19554     .
## 683 contig_22365_19642_19848_- contig_22365 getorf_JV gene 19642 19848     .
## 684 contig_22365_19829_20473_- contig_22365 getorf_JV gene 19829 20473     .
## 685   contig_22365_3229_4116_+ contig_22365 getorf_JV gene  3229  4116     .
## 686    contig_22365_356_1024_- contig_22365 getorf_JV gene   356  1024     .
## 687   contig_22365_4145_4450_+ contig_22365 getorf_JV gene  4145  4450     .
## 688   contig_22365_4548_5465_- contig_22365 getorf_JV gene  4548  5465     .
## 689   contig_22365_5498_6112_- contig_22365 getorf_JV gene  5498  6112     .
## 690   contig_22365_6127_6996_- contig_22365 getorf_JV gene  6127  6996     .
## 691  contig_22365_7069_10212_+ contig_22365 getorf_JV gene  7069 10212     .
## 692 contig_22381_10447_10659_- contig_22381 getorf_JV gene 10447 10659     .
## 693 contig_22381_10655_11848_- contig_22381 getorf_JV gene 10655 11848     .
## 694 contig_22381_12147_13829_- contig_22381 getorf_JV gene 12147 13829     .
## 695 contig_22381_13930_14562_+ contig_22381 getorf_JV gene 13930 14562     .
## 696 contig_22381_14583_15050_- contig_22381 getorf_JV gene 14583 15050     .
## 697   contig_22381_1502_2212_+ contig_22381 getorf_JV gene  1502  2212     .
## 698 contig_22381_15095_15760_+ contig_22381 getorf_JV gene 15095 15760     .
## 699 contig_22381_15756_16229_+ contig_22381 getorf_JV gene 15756 16229     .
## 700 contig_22381_16303_17325_- contig_22381 getorf_JV gene 16303 17325     .
## 701   contig_22381_2291_2440_+ contig_22381 getorf_JV gene  2291  2440     .
## 702   contig_22381_2686_2895_- contig_22381 getorf_JV gene  2686  2895     .
## 703   contig_22381_2915_3178_+ contig_22381 getorf_JV gene  2915  3178     .
## 704   contig_22381_3197_3406_- contig_22381 getorf_JV gene  3197  3406     .
## 705    contig_22381_322_1329_- contig_22381 getorf_JV gene   322  1329     .
## 706   contig_22381_3527_4222_+ contig_22381 getorf_JV gene  3527  4222     .
## 707   contig_22381_4311_9146_+ contig_22381 getorf_JV gene  4311  9146     .
## 708  contig_22381_9132_10343_- contig_22381 getorf_JV gene  9132 10343     .
## 709 contig_22449_11022_11357_+ contig_22449 getorf_JV gene 11022 11357     .
## 710    contig_22449_113_1198_+ contig_22449 getorf_JV gene   113  1198     .
## 711 contig_22449_11417_11641_+ contig_22449 getorf_JV gene 11417 11641     .
## 712   contig_22449_1235_1882_+ contig_22449 getorf_JV gene  1235  1882     .
## 713   contig_22449_1927_5850_- contig_22449 getorf_JV gene  1927  5850     .
## 714   contig_22449_5834_5983_- contig_22449 getorf_JV gene  5834  5983     .
## 715   contig_22449_5967_6521_+ contig_22449 getorf_JV gene  5967  6521     .
## 716   contig_22449_6524_7117_- contig_22449 getorf_JV gene  6524  7117     .
## 717   contig_22449_7138_7470_+ contig_22449 getorf_JV gene  7138  7470     .
## 718   contig_22449_7535_7786_+ contig_22449 getorf_JV gene  7535  7786     .
## 719   contig_22449_7877_8995_+ contig_22449 getorf_JV gene  7877  8995     .
## 720  contig_22449_8985_10409_+ contig_22449 getorf_JV gene  8985 10409     .
## 732   contig_22533_3206_4132_+ contig_22533 getorf_JV gene  3206  4132     .
## 733     contig_22533_383_616_- contig_22533 getorf_JV gene   383   616     .
## 734   contig_22533_4132_5184_+ contig_22533 getorf_JV gene  4132  5184     .
## 735   contig_22533_5193_6800_- contig_22533 getorf_JV gene  5193  6800     .
## 736   contig_22533_7018_7335_+ contig_22533 getorf_JV gene  7018  7335     .
## 737   contig_22533_7389_8687_+ contig_22533 getorf_JV gene  7389  8687     .
## 738   contig_22533_9105_9551_- contig_22533 getorf_JV gene  9105  9551     .
## 739    contig_22533_944_2971_+ contig_22533 getorf_JV gene   944  2971     .
## 740   contig_22533_9620_9844_- contig_22533 getorf_JV gene  9620  9844     .
## 763   contig_22895_2774_3307_+ contig_22895 getorf_JV gene  2774  3307     .
## 764     contig_22895_317_466_+ contig_22895 getorf_JV gene   317   466     .
## 765   contig_22895_3498_3944_+ contig_22895 getorf_JV gene  3498  3944     .
## 766    contig_22895_442_2628_+ contig_22895 getorf_JV gene   442  2628     .
##     strand phase attributes seq_length     subject_id identity alignment_length
## 132  FALSE     1        150       1450 YP_009345650.1    0.938               49
## 133  FALSE     1        153       1450 YP_009345649.1    1.000               51
## 134  FALSE     1        186       1450 YP_009345648.1    1.000               62
## 197   TRUE     1        207       7285 YP_009345675.1    1.000               69
## 198  FALSE     1        174       7285 YP_009345674.1    1.000               58
## 199  FALSE     1        258       7285 YP_009345673.1    1.000               86
## 200   TRUE     1       1935       7285 YP_009345672.1    1.000              645
## 201  FALSE     1        255       7285 YP_009345678.1    0.825              103
## 202  FALSE     1        366       7285 YP_009345677.1    1.000              122
## 203   TRUE     1        927       7285 YP_009345671.1    1.000              309
## 204  FALSE     1       1512       7285 YP_009345670.1    1.000              504
## 205  FALSE     1        318       7285 YP_009345676.1    1.000              106
## 206  FALSE     1        156       7285 YP_009345669.1    1.000               52
## 246  FALSE     1       1869       7049 YP_009345711.1    0.992              625
## 247  FALSE     1       1185       7049 YP_009345712.1    1.000              395
## 248   TRUE     1       2055       7049 YP_009345710.1    1.000              685
## 249  FALSE     1        744       7049 YP_009345709.1    1.000              248
## 478   TRUE     1        186       1091 YP_009345648.1    1.000               62
## 479   TRUE     1        153       1091 YP_009345649.1    1.000               51
## 649   TRUE     1       1011      24107 YP_009345626.1    1.000              337
## 650  FALSE     1        177      24107 YP_009345616.1    1.000               59
## 651   TRUE     1       2337      24107 YP_009345615.1    1.000              779
## 652   TRUE     1        276      24107 YP_009345614.1    1.000               92
## 653  FALSE     1        318      24107 YP_009345613.1    1.000              106
## 654  FALSE     1        252      24107 YP_009345612.1    1.000               84
## 655  FALSE     1        780      24107 YP_009345611.1    1.000              260
## 656  FALSE     1        549      24107 YP_009345610.1    1.000              183
## 657  FALSE     1        447      24107 YP_009345628.1    1.000              149
## 658  FALSE     1       2085      24107 YP_009345609.1    0.998              696
## 659   TRUE     1       1014      24107 YP_009345608.1    1.000              338
## 660   TRUE     1        828      24107 YP_009345625.1    1.000              276
## 661  FALSE     1        156      24107 YP_009345607.1    1.000               52
## 662  FALSE     1       1002      24107 YP_009345606.1    1.000              334
## 663  FALSE     1        948      24107 YP_009345605.1    1.000              316
## 664   TRUE     1        171      24107 YP_009345630.1    0.791               24
## 665   TRUE     1       1146      24107 YP_009345624.1    1.000              382
## 666  FALSE     1        678      24107 YP_009345623.1    1.000              226
## 667  FALSE     1        699      24107 YP_009345622.1    1.000              233
## 668  FALSE     1        315      24107 YP_009345621.1    1.000              105
## 669  FALSE     1        531      24107 YP_009345619.1    1.000              177
## 670  FALSE     1        225      24107 YP_009345627.1    1.000               75
## 671  FALSE     1        897      24107 YP_009345618.1    1.000              299
## 672   TRUE     1       1731      24107 YP_009345617.1    1.000              576
## 673  FALSE     1        546      20889 YP_009345698.1    1.000              182
## 674  FALSE     1        210      20889 YP_009345697.1    1.000               70
## 675  FALSE     1       4779      20889 YP_009345696.1    1.000             1593
## 676   TRUE     1        576      20889 YP_009345707.1    1.000              192
## 677  FALSE     1        159      20889 YP_009345695.1    1.000               53
## 678  FALSE     1       1134      20889 YP_009345694.1    1.000              378
## 679   TRUE     1        615      20889 YP_009345693.1    1.000              205
## 680   TRUE     1       1080      20889 YP_009345706.1    1.000              360
## 681  FALSE     1        246      20889 YP_009345692.1    1.000               82
## 682  FALSE     1        528      20889 YP_009345691.1    1.000              176
## 683  FALSE     1        207      20889 YP_009345690.1    1.000               69
## 684  FALSE     1        645      20889 YP_009345689.1    1.000              215
## 685   TRUE     1        888      20889 YP_009345705.1    1.000              296
## 686  FALSE     1        669      20889 YP_009345708.1    1.000              223
## 687   TRUE     1        306      20889 YP_009345704.1    1.000              102
## 688  FALSE     1        918      20889 YP_009345703.1    1.000              306
## 689  FALSE     1        615      20889 YP_009345702.1    1.000              205
## 690  FALSE     1        870      20889 YP_009345701.1    1.000              290
## 691   TRUE     1       3144      20889 YP_009345700.1    1.000             1048
## 692  FALSE     1        213      17653 YP_009345638.1    1.000               71
## 693  FALSE     1       1194      17653 YP_009345637.1    1.000              398
## 694  FALSE     1       1683      17653 YP_009345636.1    0.998              561
## 695   TRUE     1        633      17653 YP_009345635.1    1.000              211
## 696  FALSE     1        468      17653 YP_009345634.1    1.000              156
## 697   TRUE     1        711      17653 YP_009345646.1    1.000              237
## 698   TRUE     1        666      17653 YP_009345633.1    1.000              222
## 699   TRUE     1        474      17653 YP_009345632.1    1.000              158
## 700  FALSE     1       1023      17653 YP_009345631.1    1.000              341
## 701   TRUE     1        150      17653 YP_009345645.1    1.000               50
## 702  FALSE     1        210      17653 YP_009345644.1    1.000               70
## 703   TRUE     1        264      17653 YP_009345643.1    0.988               88
## 704  FALSE     1        210      17653 YP_009345642.1    1.000               70
## 705  FALSE     1       1008      17653 YP_009345647.1    1.000              336
## 706   TRUE     1        696      17653 YP_009345641.1    1.000              232
## 707   TRUE     1       4836      17653 YP_009345640.1    0.999             1612
## 708  FALSE     1       1212      17653 YP_009345639.1    1.000              404
## 709   TRUE     1        336      11641 YP_009345654.1    1.000              112
## 710   TRUE     1       1086      11641 YP_009345664.1    1.000              362
## 711   TRUE     1        225      11641 YP_009345653.1    1.000               75
## 712   TRUE     1        648      11641 YP_009345663.1    1.000              216
## 713  FALSE     1       3924      11641 YP_009345662.1    1.000             1308
## 714  FALSE     1        150      11641 YP_009345661.1    1.000               50
## 715   TRUE     1        555      11641 YP_009345660.1    1.000              185
## 716  FALSE     1        594      11641 YP_009345659.1    1.000              198
## 717   TRUE     1        333      11641 YP_009345658.1    1.000              111
## 718   TRUE     1        252      11641 YP_009345657.1    1.000               84
## 719   TRUE     1       1119      11641 YP_009345656.1    1.000              373
## 720   TRUE     1       1425      11641 YP_009345655.1    0.951              499
## 732   TRUE     1        927       9985 YP_009345683.1    1.000              309
## 733  FALSE     1        234       9985 YP_009345681.1    1.000               78
## 734   TRUE     1       1053       9985 YP_009345684.1    1.000              351
## 735  FALSE     1       1608       9985 YP_009345685.1    1.000              536
## 736   TRUE     1        318       9985 YP_009345686.1    1.000              106
## 737   TRUE     1       1299       9985 YP_009345687.1    1.000              433
## 738  FALSE     1        447       9985 YP_009345628.1    1.000              149
## 739   TRUE     1       2028       9985 YP_009345682.1    1.000              676
## 740  FALSE     1        225       9985 YP_009345627.1    1.000               75
## 763   TRUE     1        534       3946 YP_009345666.1    1.000              178
## 764   TRUE     1        150       3946 YP_009345668.1    1.000               50
## 765   TRUE     1        447       3946 YP_009345665.1    1.000              149
## 766   TRUE     1       2187       3946 YP_009345667.1    0.998              729
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 132          3         0      1   49      1   49  2.198e-22       92
## 133          0         0      1   51      1   51  2.328e-26      104
## 134          0         0      1   62      1   62  1.779e-35      130
## 197          0         0      1   69      1   69  2.387e-38      139
## 198          0         0      1   58      1   58  6.934e-31      117
## 199          0         0      1   86      1   86  2.175e-49      172
## 200          0         0      1  645      1  645  0.000e+00     1326
## 201         15         0      1   85      1  103  2.046e-38      140
## 202          0         0      1  122      1  122  1.196e-72      241
## 203          0         0      1  309      1  309 4.807e-203      628
## 204          0         0      1  504      1  504  0.000e+00     1092
## 205          0         0      1  106      1  106  2.906e-66      222
## 206          0         0      1   52      1   52  4.308e-29      111
## 246          5         0      1  623      1  625  0.000e+00     1234
## 247          0         0      1  395      1  395 1.306e-236      730
## 248          0         0      1  685      1  685  0.000e+00     1406
## 249          0         0      1  248      1  248 6.623e-164      511
## 478          0         0      1   62      1   62  1.779e-35      130
## 479          0         0      1   51      1   51  2.328e-26      104
## 649          0         0      1  337      1  337 2.746e-213      659
## 650          0         0      1   59      1   59  9.222e-30      114
## 651          0         0      1  779      1  779  0.000e+00     1557
## 652          0         0      1   92      1   92  1.679e-57      196
## 653          0         0      1  106      1  106  6.555e-61      206
## 654          0         0      1   84      1   84  6.745e-45      159
## 655          0         0      1  260      1  260 3.213e-167      522
## 656          0         0      1  183      1  183 1.607e-105      339
## 657          0         0      1  149      1  149  7.276e-98      315
## 658          1         0      1  695      1  696  0.000e+00     1358
## 659          0         0      1  338      1  338 4.436e-224      690
## 660          0         0      1  276      1  276 7.814e-175      544
## 661          0         0      1   52      1   52  9.466e-27      105
## 662          0         0      1  334      1  334 5.990e-216      666
## 663          0         0      1  316      1  316 1.406e-213      659
## 664          5         0     23   46     39   62  4.492e-05       43
## 665          0         0      1  382      1  382 1.715e-258      792
## 666          0         0      1  226      1  226 3.943e-143      450
## 667          0         0      1  233      1  233 7.519e-143      450
## 668          0         0      1  105      1  105  3.630e-62      210
## 669          0         0      1  177      1  177  1.009e-93      305
## 670          0         0      1   75      1   75  2.350e-38      139
## 671          0         0      1  299      1  299 1.230e-197      612
## 672          0         0      1  576      1  576  0.000e+00     1163
## 673          0         0      1  182      1  182 1.141e-122      389
## 674          0         0      1   70      1   70  8.911e-38      138
## 675          0         0      1 1593      1 1593  0.000e+00     3186
## 676          0         0      1  192      1  192 1.111e-120      383
## 677          0         0      1   53      1   53  3.547e-26      103
## 678          0         0      1  378      1  378 3.323e-244      751
## 679          0         0      1  205      1  205 4.974e-126      400
## 680          0         0      1  360      1  360 1.576e-245      753
## 681          0         0      1   82      1   82  6.666e-46      162
## 682          0         0      1  176      1  176 1.115e-108      348
## 683          0         0      1   69      1   69  1.380e-39      143
## 684          0         0      1  215      1  215 2.196e-139      439
## 685          0         0      1  296      1  296 2.457e-186      579
## 686          0         0      1  223      1  223 3.763e-144      453
## 687          0         0      1  102      1  102  6.453e-63      212
## 688          0         0      1  306      1  306 1.280e-194      603
## 689          0         0      1  205      1  205 3.866e-130      411
## 690          0         0      1  290      1  290 3.392e-189      587
## 691          0         0      1 1048      1 1048  0.000e+00     2079
## 692          0         0      1   71      1   71  7.899e-36      132
## 693          0         0      1  398      1  398 9.802e-265      811
## 694          1         0      1  561      1  561  0.000e+00     1134
## 695          0         0      1  211      1  211 1.410e-133      422
## 696          0         0      1  156      1  156  1.533e-98      317
## 697          0         0      1  237      1  237 3.203e-142      448
## 698          0         0      1  222      1  222 1.080e-138      437
## 699          0         0      1  158      1  158  8.791e-92      298
## 700          0         0      1  341      1  341 6.779e-240      736
## 701          0         0      1   50      1   50  1.080e-25      102
## 702          0         0      1   70      1   70  1.640e-33      125
## 703          1         0      1   88      1   88  1.231e-45      161
## 704          0         0      1   70      1   70  1.679e-37      137
## 705          0         0      1  336      1  336 2.054e-217      671
## 706          0         0      1  232      1  232 7.006e-156      487
## 707          2         0      1 1612      1 1612  0.000e+00     3178
## 708          0         0      1  404      1  404 2.685e-265      813
## 709          0         0      1  112      1  112  2.482e-68      228
## 710          0         0      1  362      1  362 1.094e-237      731
## 711          0         0      1   75      1   75  1.284e-42      152
## 712          0         0      1  216      1  216 7.395e-138      434
## 713          0         0      1 1308      1 1308  0.000e+00     2644
## 714          0         0      1   50      1   50  3.574e-28      109
## 715          0         0      1  185      1  185 6.417e-112      358
## 716          0         0      1  198      1  198 2.604e-126      400
## 717          0         0      1  111      1  111  1.437e-59      203
## 718          0         0      1   84      1   84  9.579e-49      170
## 719          0         0      1  373      1  373 6.608e-247      758
## 720         23         0      1  475      1  499 2.161e-300      918
## 732          0         0      1  309      1  309 1.881e-190      591
## 733          0         0      1   78      1   78  5.174e-49      170
## 734          0         0      1  351      1  351 2.875e-223      689
## 735          0         0      1  536      1  536  0.000e+00     1064
## 736          0         0      1  106      1  106  1.233e-67      226
## 737          0         0      1  433      1  433 9.237e-280      856
## 738          0         0      1  149      1  149  7.276e-98      315
## 739          0         0      1  676      1  676  0.000e+00     1362
## 740          0         0      1   75      1   75  2.350e-38      139
## 763          0         0      1  178      1  178 4.563e-122      387
## 764          0         0      1   50      1   50  1.271e-27      107
## 765          0         0      1  149      1  149  1.179e-92      300
## 766          1         0      1  729      1  729  0.000e+00     1454
##                                                                                 annotation
## 132               hypothetical protein LbFV_ORF46 [Leptopilina boulardi filamentous virus]
## 133               hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 134               hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 197               hypothetical protein LbFV_ORF71 [Leptopilina boulardi filamentous virus]
## 198               hypothetical protein LbFV_ORF70 [Leptopilina boulardi filamentous virus]
## 199               hypothetical protein LbFV_ORF69 [Leptopilina boulardi filamentous virus]
## 200               hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 201               hypothetical protein LbFV_ORF74 [Leptopilina boulardi filamentous virus]
## 202               hypothetical protein LbFV_ORF73 [Leptopilina boulardi filamentous virus]
## 203               hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 204               putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 205               hypothetical protein LbFV_ORF72 [Leptopilina boulardi filamentous virus]
## 206               hypothetical protein LbFV_ORF65 [Leptopilina boulardi filamentous virus]
## 246              hypothetical protein LbFV_ORF107 [Leptopilina boulardi filamentous virus]
## 247              hypothetical protein LbFV_ORF108 [Leptopilina boulardi filamentous virus]
## 248                          putative ODV protein [Leptopilina boulardi filamentous virus]
## 249              hypothetical protein LbFV_ORF105 [Leptopilina boulardi filamentous virus]
## 478               hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 479               hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 649               hypothetical protein LbFV_ORF22 [Leptopilina boulardi filamentous virus]
## 650               hypothetical protein LbFV_ORF12 [Leptopilina boulardi filamentous virus]
## 651                           JmJC domain protein [Leptopilina boulardi filamentous virus]
## 652               hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 653                hypothetical protein LbFV_ORF9 [Leptopilina boulardi filamentous virus]
## 654                hypothetical protein LbFV_ORF8 [Leptopilina boulardi filamentous virus]
## 655                hypothetical protein LbFV_ORF7 [Leptopilina boulardi filamentous virus]
## 656                hypothetical protein LbFV_ORF6 [Leptopilina boulardi filamentous virus]
## 657               hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 658                hypothetical protein LbFV_ORF5 [Leptopilina boulardi filamentous virus]
## 659                hypothetical protein LbFV_ORF4 [Leptopilina boulardi filamentous virus]
## 660               hypothetical protein LbFV_ORF21 [Leptopilina boulardi filamentous virus]
## 661                hypothetical protein LbFV_ORF3 [Leptopilina boulardi filamentous virus]
## 662                hypothetical protein LbFV_ORF2 [Leptopilina boulardi filamentous virus]
## 663                hypothetical protein LbFV_ORF1 [Leptopilina boulardi filamentous virus]
## 664               hypothetical protein LbFV_ORF26 [Leptopilina boulardi filamentous virus]
## 665               hypothetical protein LbFV_ORF20 [Leptopilina boulardi filamentous virus]
## 666               hypothetical protein LbFV_ORF19 [Leptopilina boulardi filamentous virus]
## 667               hypothetical protein LbFV_ORF18 [Leptopilina boulardi filamentous virus]
## 668               hypothetical protein LbFV_ORF17 [Leptopilina boulardi filamentous virus]
## 669               hypothetical protein LbFV_ORF15 [Leptopilina boulardi filamentous virus]
## 670               hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 671                                    BRO family [Leptopilina boulardi filamentous virus]
## 672                           JmJC domain protein [Leptopilina boulardi filamentous virus]
## 673               hypothetical protein LbFV_ORF94 [Leptopilina boulardi filamentous virus]
## 674               hypothetical protein LbFV_ORF93 [Leptopilina boulardi filamentous virus]
## 675               hypothetical protein LbFV_ORF92 [Leptopilina boulardi filamentous virus]
## 676              hypothetical protein LbFV_ORF103 [Leptopilina boulardi filamentous virus]
## 677               hypothetical protein LbFV_ORF91 [Leptopilina boulardi filamentous virus]
## 678               hypothetical protein LbFV_ORF90 [Leptopilina boulardi filamentous virus]
## 679               hypothetical protein LbFV_ORF89 [Leptopilina boulardi filamentous virus]
## 680              hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 681               hypothetical protein LbFV_ORF88 [Leptopilina boulardi filamentous virus]
## 682               hypothetical protein LbFV_ORF87 [Leptopilina boulardi filamentous virus]
## 683               hypothetical protein LbFV_ORF86 [Leptopilina boulardi filamentous virus]
## 684               hypothetical protein LbFV_ORF85 [Leptopilina boulardi filamentous virus]
## 685                         MSV199 domain protein [Leptopilina boulardi filamentous virus]
## 686              hypothetical protein LbFV_ORF104 [Leptopilina boulardi filamentous virus]
## 687              hypothetical protein LbFV_ORF100 [Leptopilina boulardi filamentous virus]
## 688               hypothetical protein LbFV_ORF99 [Leptopilina boulardi filamentous virus]
## 689               hypothetical protein LbFV_ORF98 [Leptopilina boulardi filamentous virus]
## 690               hypothetical protein LbFV_ORF97 [Leptopilina boulardi filamentous virus]
## 691               hypothetical protein LbFV_ORF96 [Leptopilina boulardi filamentous virus]
## 692               hypothetical protein LbFV_ORF34 [Leptopilina boulardi filamentous virus]
## 693               hypothetical protein LbFV_ORF33 [Leptopilina boulardi filamentous virus]
## 694               hypothetical protein LbFV_ORF32 [Leptopilina boulardi filamentous virus]
## 695               hypothetical protein LbFV_ORF31 [Leptopilina boulardi filamentous virus]
## 696               hypothetical protein LbFV_ORF30 [Leptopilina boulardi filamentous virus]
## 697               hypothetical protein LbFV_ORF42 [Leptopilina boulardi filamentous virus]
## 698               hypothetical protein LbFV_ORF29 [Leptopilina boulardi filamentous virus]
## 699               hypothetical protein LbFV_ORF28 [Leptopilina boulardi filamentous virus]
## 700               putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 701               hypothetical protein LbFV_ORF41 [Leptopilina boulardi filamentous virus]
## 702               hypothetical protein LbFV_ORF40 [Leptopilina boulardi filamentous virus]
## 703               hypothetical protein LbFV_ORF39 [Leptopilina boulardi filamentous virus]
## 704               hypothetical protein LbFV_ORF38 [Leptopilina boulardi filamentous virus]
## 705               hypothetical protein LbFV_ORF43 [Leptopilina boulardi filamentous virus]
## 706                          nudix domain protein [Leptopilina boulardi filamentous virus]
## 707               hypothetical protein LbFV_ORF36 [Leptopilina boulardi filamentous virus]
## 708               hypothetical protein LbFV_ORF35 [Leptopilina boulardi filamentous virus]
## 709               putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 710 putative lecithin:cholesterol acyltransferase [Leptopilina boulardi filamentous virus]
## 711               putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 712               hypothetical protein LbFV_ORF59 [Leptopilina boulardi filamentous virus]
## 713                              putative DNA pol [Leptopilina boulardi filamentous virus]
## 714               hypothetical protein LbFV_ORF57 [Leptopilina boulardi filamentous virus]
## 715               hypothetical protein LbFV_ORF56 [Leptopilina boulardi filamentous virus]
## 716               hypothetical protein LbFV_ORF55 [Leptopilina boulardi filamentous virus]
## 717               hypothetical protein LbFV_ORF54 [Leptopilina boulardi filamentous virus]
## 718               hypothetical protein LbFV_ORF53 [Leptopilina boulardi filamentous virus]
## 719               hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 720               hypothetical protein LbFV_ORF51 [Leptopilina boulardi filamentous virus]
## 732               hypothetical protein LbFV_ORF79 [Leptopilina boulardi filamentous virus]
## 733               hypothetical protein LbFV_ORF77 [Leptopilina boulardi filamentous virus]
## 734               hypothetical protein LbFV_ORF80 [Leptopilina boulardi filamentous virus]
## 735                               putative ATPase [Leptopilina boulardi filamentous virus]
## 736               hypothetical protein LbFV_ORF82 [Leptopilina boulardi filamentous virus]
## 737               hypothetical protein LbFV_ORF83 [Leptopilina boulardi filamentous virus]
## 738               hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 739               hypothetical protein LbFV_ORF78 [Leptopilina boulardi filamentous virus]
## 740               hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 763               hypothetical protein LbFV_ORF62 [Leptopilina boulardi filamentous virus]
## 764               hypothetical protein LbFV_ORF64 [Leptopilina boulardi filamentous virus]
## 765                            mucin-like protein [Leptopilina boulardi filamentous virus]
## 766               hypothetical protein LbFV_ORF63 [Leptopilina boulardi filamentous virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                       orf_name        seqid    source type start   end score
## 132       contig_12283_3_152_- contig_12283 getorf_JV gene     3   152     .
## 133     contig_12283_387_539_- contig_12283 getorf_JV gene   387   539     .
## 134     contig_12283_693_878_- contig_12283 getorf_JV gene   693   878     .
## 197    contig_1350_1158_1364_+  contig_1350 getorf_JV gene  1158  1364     .
## 198    contig_1350_1472_1645_-  contig_1350 getorf_JV gene  1472  1645     .
## 199    contig_1350_1824_2081_-  contig_1350 getorf_JV gene  1824  2081     .
## 200    contig_1350_2089_4023_+  contig_1350 getorf_JV gene  2089  4023     .
## 201        contig_1350_3_257_-  contig_1350 getorf_JV gene     3   257     .
## 202      contig_1350_315_680_-  contig_1350 getorf_JV gene   315   680     .
## 203    contig_1350_4200_5126_+  contig_1350 getorf_JV gene  4200  5126     .
## 204    contig_1350_5229_6740_-  contig_1350 getorf_JV gene  5229  6740     .
## 205      contig_1350_680_997_-  contig_1350 getorf_JV gene   680   997     .
## 206    contig_1350_6947_7102_-  contig_1350 getorf_JV gene  6947  7102     .
## 246    contig_1505_1234_3102_-  contig_1505 getorf_JV gene  1234  3102     .
## 247       contig_1505_2_1186_-  contig_1505 getorf_JV gene     2  1186     .
## 248    contig_1505_3572_5626_+  contig_1505 getorf_JV gene  3572  5626     .
## 249    contig_1505_5654_6397_-  contig_1505 getorf_JV gene  5654  6397     .
## 478     contig_19307_248_433_+ contig_19307 getorf_JV gene   248   433     .
## 479     contig_19307_587_739_+ contig_19307 getorf_JV gene   587   739     .
## 649   contig_22345_1017_2027_+ contig_22345 getorf_JV gene  1017  2027     .
## 650 contig_22345_12095_12271_- contig_22345 getorf_JV gene 12095 12271     .
## 651 contig_22345_12686_15022_+ contig_22345 getorf_JV gene 12686 15022     .
## 652 contig_22345_15388_15663_+ contig_22345 getorf_JV gene 15388 15663     .
## 653 contig_22345_15826_16143_- contig_22345 getorf_JV gene 15826 16143     .
## 654 contig_22345_16234_16485_- contig_22345 getorf_JV gene 16234 16485     .
## 655 contig_22345_16630_17409_- contig_22345 getorf_JV gene 16630 17409     .
## 656 contig_22345_17417_17965_- contig_22345 getorf_JV gene 17417 17965     .
## 657     contig_22345_179_625_- contig_22345 getorf_JV gene   179   625     .
## 658 contig_22345_18010_20094_- contig_22345 getorf_JV gene 18010 20094     .
## 659 contig_22345_20331_21344_+ contig_22345 getorf_JV gene 20331 21344     .
## 660   contig_22345_2055_2882_+ contig_22345 getorf_JV gene  2055  2882     .
## 661 contig_22345_21392_21547_- contig_22345 getorf_JV gene 21392 21547     .
## 662 contig_22345_21617_22618_- contig_22345 getorf_JV gene 21617 22618     .
## 663 contig_22345_22700_23647_- contig_22345 getorf_JV gene 22700 23647     .
## 664 contig_22345_23792_23962_+ contig_22345 getorf_JV gene 23792 23962     .
## 665   contig_22345_2958_4103_+ contig_22345 getorf_JV gene  2958  4103     .
## 666   contig_22345_4521_5198_- contig_22345 getorf_JV gene  4521  5198     .
## 667   contig_22345_5208_5906_- contig_22345 getorf_JV gene  5208  5906     .
## 668   contig_22345_5931_6245_- contig_22345 getorf_JV gene  5931  6245     .
## 669   contig_22345_6770_7300_- contig_22345 getorf_JV gene  6770  7300     .
## 670     contig_22345_694_918_- contig_22345 getorf_JV gene   694   918     .
## 671   contig_22345_7624_8520_- contig_22345 getorf_JV gene  7624  8520     .
## 672  contig_22345_9649_11379_+ contig_22345 getorf_JV gene  9649 11379     .
## 673 contig_22365_10325_10870_- contig_22365 getorf_JV gene 10325 10870     .
## 674 contig_22365_10956_11165_- contig_22365 getorf_JV gene 10956 11165     .
## 675 contig_22365_11308_16086_- contig_22365 getorf_JV gene 11308 16086     .
## 676   contig_22365_1219_1794_+ contig_22365 getorf_JV gene  1219  1794     .
## 677 contig_22365_16399_16557_- contig_22365 getorf_JV gene 16399 16557     .
## 678 contig_22365_16643_17776_- contig_22365 getorf_JV gene 16643 17776     .
## 679 contig_22365_18040_18654_+ contig_22365 getorf_JV gene 18040 18654     .
## 680   contig_22365_1842_2921_+ contig_22365 getorf_JV gene  1842  2921     .
## 681 contig_22365_18779_19024_- contig_22365 getorf_JV gene 18779 19024     .
## 682 contig_22365_19027_19554_- contig_22365 getorf_JV gene 19027 19554     .
## 683 contig_22365_19642_19848_- contig_22365 getorf_JV gene 19642 19848     .
## 684 contig_22365_19829_20473_- contig_22365 getorf_JV gene 19829 20473     .
## 685   contig_22365_3229_4116_+ contig_22365 getorf_JV gene  3229  4116     .
## 686    contig_22365_356_1024_- contig_22365 getorf_JV gene   356  1024     .
## 687   contig_22365_4145_4450_+ contig_22365 getorf_JV gene  4145  4450     .
## 688   contig_22365_4548_5465_- contig_22365 getorf_JV gene  4548  5465     .
## 689   contig_22365_5498_6112_- contig_22365 getorf_JV gene  5498  6112     .
## 690   contig_22365_6127_6996_- contig_22365 getorf_JV gene  6127  6996     .
## 691  contig_22365_7069_10212_+ contig_22365 getorf_JV gene  7069 10212     .
## 692 contig_22381_10447_10659_- contig_22381 getorf_JV gene 10447 10659     .
## 693 contig_22381_10655_11848_- contig_22381 getorf_JV gene 10655 11848     .
## 694 contig_22381_12147_13829_- contig_22381 getorf_JV gene 12147 13829     .
## 695 contig_22381_13930_14562_+ contig_22381 getorf_JV gene 13930 14562     .
## 696 contig_22381_14583_15050_- contig_22381 getorf_JV gene 14583 15050     .
## 697   contig_22381_1502_2212_+ contig_22381 getorf_JV gene  1502  2212     .
## 698 contig_22381_15095_15760_+ contig_22381 getorf_JV gene 15095 15760     .
## 699 contig_22381_15756_16229_+ contig_22381 getorf_JV gene 15756 16229     .
## 700 contig_22381_16303_17325_- contig_22381 getorf_JV gene 16303 17325     .
## 701   contig_22381_2291_2440_+ contig_22381 getorf_JV gene  2291  2440     .
## 702   contig_22381_2686_2895_- contig_22381 getorf_JV gene  2686  2895     .
## 703   contig_22381_2915_3178_+ contig_22381 getorf_JV gene  2915  3178     .
## 704   contig_22381_3197_3406_- contig_22381 getorf_JV gene  3197  3406     .
## 705    contig_22381_322_1329_- contig_22381 getorf_JV gene   322  1329     .
## 706   contig_22381_3527_4222_+ contig_22381 getorf_JV gene  3527  4222     .
## 707   contig_22381_4311_9146_+ contig_22381 getorf_JV gene  4311  9146     .
## 708  contig_22381_9132_10343_- contig_22381 getorf_JV gene  9132 10343     .
## 709 contig_22449_11022_11357_+ contig_22449 getorf_JV gene 11022 11357     .
## 710    contig_22449_113_1198_+ contig_22449 getorf_JV gene   113  1198     .
## 711 contig_22449_11417_11641_+ contig_22449 getorf_JV gene 11417 11641     .
## 712   contig_22449_1235_1882_+ contig_22449 getorf_JV gene  1235  1882     .
## 713   contig_22449_1927_5850_- contig_22449 getorf_JV gene  1927  5850     .
## 714   contig_22449_5834_5983_- contig_22449 getorf_JV gene  5834  5983     .
## 715   contig_22449_5967_6521_+ contig_22449 getorf_JV gene  5967  6521     .
## 716   contig_22449_6524_7117_- contig_22449 getorf_JV gene  6524  7117     .
## 717   contig_22449_7138_7470_+ contig_22449 getorf_JV gene  7138  7470     .
## 718   contig_22449_7535_7786_+ contig_22449 getorf_JV gene  7535  7786     .
## 719   contig_22449_7877_8995_+ contig_22449 getorf_JV gene  7877  8995     .
## 720  contig_22449_8985_10409_+ contig_22449 getorf_JV gene  8985 10409     .
## 732   contig_22533_3206_4132_+ contig_22533 getorf_JV gene  3206  4132     .
## 733     contig_22533_383_616_- contig_22533 getorf_JV gene   383   616     .
## 734   contig_22533_4132_5184_+ contig_22533 getorf_JV gene  4132  5184     .
## 735   contig_22533_5193_6800_- contig_22533 getorf_JV gene  5193  6800     .
## 736   contig_22533_7018_7335_+ contig_22533 getorf_JV gene  7018  7335     .
## 737   contig_22533_7389_8687_+ contig_22533 getorf_JV gene  7389  8687     .
## 738   contig_22533_9105_9551_- contig_22533 getorf_JV gene  9105  9551     .
## 739    contig_22533_944_2971_+ contig_22533 getorf_JV gene   944  2971     .
## 740   contig_22533_9620_9844_- contig_22533 getorf_JV gene  9620  9844     .
## 763   contig_22895_2774_3307_+ contig_22895 getorf_JV gene  2774  3307     .
## 764     contig_22895_317_466_+ contig_22895 getorf_JV gene   317   466     .
## 765   contig_22895_3498_3944_+ contig_22895 getorf_JV gene  3498  3944     .
## 766    contig_22895_442_2628_+ contig_22895 getorf_JV gene   442  2628     .
##     strand phase attributes seq_length     subject_id identity alignment_length
## 132  FALSE     1        150       1450 YP_009345650.1    0.938               49
## 133  FALSE     1        153       1450 YP_009345649.1    1.000               51
## 134  FALSE     1        186       1450 YP_009345648.1    1.000               62
## 197   TRUE     1        207       7285 YP_009345675.1    1.000               69
## 198  FALSE     1        174       7285 YP_009345674.1    1.000               58
## 199  FALSE     1        258       7285 YP_009345673.1    1.000               86
## 200   TRUE     1       1935       7285 YP_009345672.1    1.000              645
## 201  FALSE     1        255       7285 YP_009345678.1    0.825              103
## 202  FALSE     1        366       7285 YP_009345677.1    1.000              122
## 203   TRUE     1        927       7285 YP_009345671.1    1.000              309
## 204  FALSE     1       1512       7285 YP_009345670.1    1.000              504
## 205  FALSE     1        318       7285 YP_009345676.1    1.000              106
## 206  FALSE     1        156       7285 YP_009345669.1    1.000               52
## 246  FALSE     1       1869       7049 YP_009345711.1    0.992              625
## 247  FALSE     1       1185       7049 YP_009345712.1    1.000              395
## 248   TRUE     1       2055       7049 YP_009345710.1    1.000              685
## 249  FALSE     1        744       7049 YP_009345709.1    1.000              248
## 478   TRUE     1        186       1091 YP_009345648.1    1.000               62
## 479   TRUE     1        153       1091 YP_009345649.1    1.000               51
## 649   TRUE     1       1011      24107 YP_009345626.1    1.000              337
## 650  FALSE     1        177      24107 YP_009345616.1    1.000               59
## 651   TRUE     1       2337      24107 YP_009345615.1    1.000              779
## 652   TRUE     1        276      24107 YP_009345614.1    1.000               92
## 653  FALSE     1        318      24107 YP_009345613.1    1.000              106
## 654  FALSE     1        252      24107 YP_009345612.1    1.000               84
## 655  FALSE     1        780      24107 YP_009345611.1    1.000              260
## 656  FALSE     1        549      24107 YP_009345610.1    1.000              183
## 657  FALSE     1        447      24107 YP_009345628.1    1.000              149
## 658  FALSE     1       2085      24107 YP_009345609.1    0.998              696
## 659   TRUE     1       1014      24107 YP_009345608.1    1.000              338
## 660   TRUE     1        828      24107 YP_009345625.1    1.000              276
## 661  FALSE     1        156      24107 YP_009345607.1    1.000               52
## 662  FALSE     1       1002      24107 YP_009345606.1    1.000              334
## 663  FALSE     1        948      24107 YP_009345605.1    1.000              316
## 664   TRUE     1        171      24107 YP_009345630.1    0.791               24
## 665   TRUE     1       1146      24107 YP_009345624.1    1.000              382
## 666  FALSE     1        678      24107 YP_009345623.1    1.000              226
## 667  FALSE     1        699      24107 YP_009345622.1    1.000              233
## 668  FALSE     1        315      24107 YP_009345621.1    1.000              105
## 669  FALSE     1        531      24107 YP_009345619.1    1.000              177
## 670  FALSE     1        225      24107 YP_009345627.1    1.000               75
## 671  FALSE     1        897      24107 YP_009345618.1    1.000              299
## 672   TRUE     1       1731      24107 YP_009345617.1    1.000              576
## 673  FALSE     1        546      20889 YP_009345698.1    1.000              182
## 674  FALSE     1        210      20889 YP_009345697.1    1.000               70
## 675  FALSE     1       4779      20889 YP_009345696.1    1.000             1593
## 676   TRUE     1        576      20889 YP_009345707.1    1.000              192
## 677  FALSE     1        159      20889 YP_009345695.1    1.000               53
## 678  FALSE     1       1134      20889 YP_009345694.1    1.000              378
## 679   TRUE     1        615      20889 YP_009345693.1    1.000              205
## 680   TRUE     1       1080      20889 YP_009345706.1    1.000              360
## 681  FALSE     1        246      20889 YP_009345692.1    1.000               82
## 682  FALSE     1        528      20889 YP_009345691.1    1.000              176
## 683  FALSE     1        207      20889 YP_009345690.1    1.000               69
## 684  FALSE     1        645      20889 YP_009345689.1    1.000              215
## 685   TRUE     1        888      20889 YP_009345705.1    1.000              296
## 686  FALSE     1        669      20889 YP_009345708.1    1.000              223
## 687   TRUE     1        306      20889 YP_009345704.1    1.000              102
## 688  FALSE     1        918      20889 YP_009345703.1    1.000              306
## 689  FALSE     1        615      20889 YP_009345702.1    1.000              205
## 690  FALSE     1        870      20889 YP_009345701.1    1.000              290
## 691   TRUE     1       3144      20889 YP_009345700.1    1.000             1048
## 692  FALSE     1        213      17653 YP_009345638.1    1.000               71
## 693  FALSE     1       1194      17653 YP_009345637.1    1.000              398
## 694  FALSE     1       1683      17653 YP_009345636.1    0.998              561
## 695   TRUE     1        633      17653 YP_009345635.1    1.000              211
## 696  FALSE     1        468      17653 YP_009345634.1    1.000              156
## 697   TRUE     1        711      17653 YP_009345646.1    1.000              237
## 698   TRUE     1        666      17653 YP_009345633.1    1.000              222
## 699   TRUE     1        474      17653 YP_009345632.1    1.000              158
## 700  FALSE     1       1023      17653 YP_009345631.1    1.000              341
## 701   TRUE     1        150      17653 YP_009345645.1    1.000               50
## 702  FALSE     1        210      17653 YP_009345644.1    1.000               70
## 703   TRUE     1        264      17653 YP_009345643.1    0.988               88
## 704  FALSE     1        210      17653 YP_009345642.1    1.000               70
## 705  FALSE     1       1008      17653 YP_009345647.1    1.000              336
## 706   TRUE     1        696      17653 YP_009345641.1    1.000              232
## 707   TRUE     1       4836      17653 YP_009345640.1    0.999             1612
## 708  FALSE     1       1212      17653 YP_009345639.1    1.000              404
## 709   TRUE     1        336      11641 YP_009345654.1    1.000              112
## 710   TRUE     1       1086      11641 YP_009345664.1    1.000              362
## 711   TRUE     1        225      11641 YP_009345653.1    1.000               75
## 712   TRUE     1        648      11641 YP_009345663.1    1.000              216
## 713  FALSE     1       3924      11641 YP_009345662.1    1.000             1308
## 714  FALSE     1        150      11641 YP_009345661.1    1.000               50
## 715   TRUE     1        555      11641 YP_009345660.1    1.000              185
## 716  FALSE     1        594      11641 YP_009345659.1    1.000              198
## 717   TRUE     1        333      11641 YP_009345658.1    1.000              111
## 718   TRUE     1        252      11641 YP_009345657.1    1.000               84
## 719   TRUE     1       1119      11641 YP_009345656.1    1.000              373
## 720   TRUE     1       1425      11641 YP_009345655.1    0.951              499
## 732   TRUE     1        927       9985 YP_009345683.1    1.000              309
## 733  FALSE     1        234       9985 YP_009345681.1    1.000               78
## 734   TRUE     1       1053       9985 YP_009345684.1    1.000              351
## 735  FALSE     1       1608       9985 YP_009345685.1    1.000              536
## 736   TRUE     1        318       9985 YP_009345686.1    1.000              106
## 737   TRUE     1       1299       9985 YP_009345687.1    1.000              433
## 738  FALSE     1        447       9985 YP_009345628.1    1.000              149
## 739   TRUE     1       2028       9985 YP_009345682.1    1.000              676
## 740  FALSE     1        225       9985 YP_009345627.1    1.000               75
## 763   TRUE     1        534       3946 YP_009345666.1    1.000              178
## 764   TRUE     1        150       3946 YP_009345668.1    1.000               50
## 765   TRUE     1        447       3946 YP_009345665.1    1.000              149
## 766   TRUE     1       2187       3946 YP_009345667.1    0.998              729
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 132          3         0      1   49      1   49  2.198e-22       92
## 133          0         0      1   51      1   51  2.328e-26      104
## 134          0         0      1   62      1   62  1.779e-35      130
## 197          0         0      1   69      1   69  2.387e-38      139
## 198          0         0      1   58      1   58  6.934e-31      117
## 199          0         0      1   86      1   86  2.175e-49      172
## 200          0         0      1  645      1  645  0.000e+00     1326
## 201         15         0      1   85      1  103  2.046e-38      140
## 202          0         0      1  122      1  122  1.196e-72      241
## 203          0         0      1  309      1  309 4.807e-203      628
## 204          0         0      1  504      1  504  0.000e+00     1092
## 205          0         0      1  106      1  106  2.906e-66      222
## 206          0         0      1   52      1   52  4.308e-29      111
## 246          5         0      1  623      1  625  0.000e+00     1234
## 247          0         0      1  395      1  395 1.306e-236      730
## 248          0         0      1  685      1  685  0.000e+00     1406
## 249          0         0      1  248      1  248 6.623e-164      511
## 478          0         0      1   62      1   62  1.779e-35      130
## 479          0         0      1   51      1   51  2.328e-26      104
## 649          0         0      1  337      1  337 2.746e-213      659
## 650          0         0      1   59      1   59  9.222e-30      114
## 651          0         0      1  779      1  779  0.000e+00     1557
## 652          0         0      1   92      1   92  1.679e-57      196
## 653          0         0      1  106      1  106  6.555e-61      206
## 654          0         0      1   84      1   84  6.745e-45      159
## 655          0         0      1  260      1  260 3.213e-167      522
## 656          0         0      1  183      1  183 1.607e-105      339
## 657          0         0      1  149      1  149  7.276e-98      315
## 658          1         0      1  695      1  696  0.000e+00     1358
## 659          0         0      1  338      1  338 4.436e-224      690
## 660          0         0      1  276      1  276 7.814e-175      544
## 661          0         0      1   52      1   52  9.466e-27      105
## 662          0         0      1  334      1  334 5.990e-216      666
## 663          0         0      1  316      1  316 1.406e-213      659
## 664          5         0     23   46     39   62  4.492e-05       43
## 665          0         0      1  382      1  382 1.715e-258      792
## 666          0         0      1  226      1  226 3.943e-143      450
## 667          0         0      1  233      1  233 7.519e-143      450
## 668          0         0      1  105      1  105  3.630e-62      210
## 669          0         0      1  177      1  177  1.009e-93      305
## 670          0         0      1   75      1   75  2.350e-38      139
## 671          0         0      1  299      1  299 1.230e-197      612
## 672          0         0      1  576      1  576  0.000e+00     1163
## 673          0         0      1  182      1  182 1.141e-122      389
## 674          0         0      1   70      1   70  8.911e-38      138
## 675          0         0      1 1593      1 1593  0.000e+00     3186
## 676          0         0      1  192      1  192 1.111e-120      383
## 677          0         0      1   53      1   53  3.547e-26      103
## 678          0         0      1  378      1  378 3.323e-244      751
## 679          0         0      1  205      1  205 4.974e-126      400
## 680          0         0      1  360      1  360 1.576e-245      753
## 681          0         0      1   82      1   82  6.666e-46      162
## 682          0         0      1  176      1  176 1.115e-108      348
## 683          0         0      1   69      1   69  1.380e-39      143
## 684          0         0      1  215      1  215 2.196e-139      439
## 685          0         0      1  296      1  296 2.457e-186      579
## 686          0         0      1  223      1  223 3.763e-144      453
## 687          0         0      1  102      1  102  6.453e-63      212
## 688          0         0      1  306      1  306 1.280e-194      603
## 689          0         0      1  205      1  205 3.866e-130      411
## 690          0         0      1  290      1  290 3.392e-189      587
## 691          0         0      1 1048      1 1048  0.000e+00     2079
## 692          0         0      1   71      1   71  7.899e-36      132
## 693          0         0      1  398      1  398 9.802e-265      811
## 694          1         0      1  561      1  561  0.000e+00     1134
## 695          0         0      1  211      1  211 1.410e-133      422
## 696          0         0      1  156      1  156  1.533e-98      317
## 697          0         0      1  237      1  237 3.203e-142      448
## 698          0         0      1  222      1  222 1.080e-138      437
## 699          0         0      1  158      1  158  8.791e-92      298
## 700          0         0      1  341      1  341 6.779e-240      736
## 701          0         0      1   50      1   50  1.080e-25      102
## 702          0         0      1   70      1   70  1.640e-33      125
## 703          1         0      1   88      1   88  1.231e-45      161
## 704          0         0      1   70      1   70  1.679e-37      137
## 705          0         0      1  336      1  336 2.054e-217      671
## 706          0         0      1  232      1  232 7.006e-156      487
## 707          2         0      1 1612      1 1612  0.000e+00     3178
## 708          0         0      1  404      1  404 2.685e-265      813
## 709          0         0      1  112      1  112  2.482e-68      228
## 710          0         0      1  362      1  362 1.094e-237      731
## 711          0         0      1   75      1   75  1.284e-42      152
## 712          0         0      1  216      1  216 7.395e-138      434
## 713          0         0      1 1308      1 1308  0.000e+00     2644
## 714          0         0      1   50      1   50  3.574e-28      109
## 715          0         0      1  185      1  185 6.417e-112      358
## 716          0         0      1  198      1  198 2.604e-126      400
## 717          0         0      1  111      1  111  1.437e-59      203
## 718          0         0      1   84      1   84  9.579e-49      170
## 719          0         0      1  373      1  373 6.608e-247      758
## 720         23         0      1  475      1  499 2.161e-300      918
## 732          0         0      1  309      1  309 1.881e-190      591
## 733          0         0      1   78      1   78  5.174e-49      170
## 734          0         0      1  351      1  351 2.875e-223      689
## 735          0         0      1  536      1  536  0.000e+00     1064
## 736          0         0      1  106      1  106  1.233e-67      226
## 737          0         0      1  433      1  433 9.237e-280      856
## 738          0         0      1  149      1  149  7.276e-98      315
## 739          0         0      1  676      1  676  0.000e+00     1362
## 740          0         0      1   75      1   75  2.350e-38      139
## 763          0         0      1  178      1  178 4.563e-122      387
## 764          0         0      1   50      1   50  1.271e-27      107
## 765          0         0      1  149      1  149  1.179e-92      300
## 766          1         0      1  729      1  729  0.000e+00     1454
##                                                                                 annotation
## 132               hypothetical protein LbFV_ORF46 [Leptopilina boulardi filamentous virus]
## 133               hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 134               hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 197               hypothetical protein LbFV_ORF71 [Leptopilina boulardi filamentous virus]
## 198               hypothetical protein LbFV_ORF70 [Leptopilina boulardi filamentous virus]
## 199               hypothetical protein LbFV_ORF69 [Leptopilina boulardi filamentous virus]
## 200               hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 201               hypothetical protein LbFV_ORF74 [Leptopilina boulardi filamentous virus]
## 202               hypothetical protein LbFV_ORF73 [Leptopilina boulardi filamentous virus]
## 203               hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 204               putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 205               hypothetical protein LbFV_ORF72 [Leptopilina boulardi filamentous virus]
## 206               hypothetical protein LbFV_ORF65 [Leptopilina boulardi filamentous virus]
## 246              hypothetical protein LbFV_ORF107 [Leptopilina boulardi filamentous virus]
## 247              hypothetical protein LbFV_ORF108 [Leptopilina boulardi filamentous virus]
## 248                          putative ODV protein [Leptopilina boulardi filamentous virus]
## 249              hypothetical protein LbFV_ORF105 [Leptopilina boulardi filamentous virus]
## 478               hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 479               hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 649               hypothetical protein LbFV_ORF22 [Leptopilina boulardi filamentous virus]
## 650               hypothetical protein LbFV_ORF12 [Leptopilina boulardi filamentous virus]
## 651                           JmJC domain protein [Leptopilina boulardi filamentous virus]
## 652               hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 653                hypothetical protein LbFV_ORF9 [Leptopilina boulardi filamentous virus]
## 654                hypothetical protein LbFV_ORF8 [Leptopilina boulardi filamentous virus]
## 655                hypothetical protein LbFV_ORF7 [Leptopilina boulardi filamentous virus]
## 656                hypothetical protein LbFV_ORF6 [Leptopilina boulardi filamentous virus]
## 657               hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 658                hypothetical protein LbFV_ORF5 [Leptopilina boulardi filamentous virus]
## 659                hypothetical protein LbFV_ORF4 [Leptopilina boulardi filamentous virus]
## 660               hypothetical protein LbFV_ORF21 [Leptopilina boulardi filamentous virus]
## 661                hypothetical protein LbFV_ORF3 [Leptopilina boulardi filamentous virus]
## 662                hypothetical protein LbFV_ORF2 [Leptopilina boulardi filamentous virus]
## 663                hypothetical protein LbFV_ORF1 [Leptopilina boulardi filamentous virus]
## 664               hypothetical protein LbFV_ORF26 [Leptopilina boulardi filamentous virus]
## 665               hypothetical protein LbFV_ORF20 [Leptopilina boulardi filamentous virus]
## 666               hypothetical protein LbFV_ORF19 [Leptopilina boulardi filamentous virus]
## 667               hypothetical protein LbFV_ORF18 [Leptopilina boulardi filamentous virus]
## 668               hypothetical protein LbFV_ORF17 [Leptopilina boulardi filamentous virus]
## 669               hypothetical protein LbFV_ORF15 [Leptopilina boulardi filamentous virus]
## 670               hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 671                                    BRO family [Leptopilina boulardi filamentous virus]
## 672                           JmJC domain protein [Leptopilina boulardi filamentous virus]
## 673               hypothetical protein LbFV_ORF94 [Leptopilina boulardi filamentous virus]
## 674               hypothetical protein LbFV_ORF93 [Leptopilina boulardi filamentous virus]
## 675               hypothetical protein LbFV_ORF92 [Leptopilina boulardi filamentous virus]
## 676              hypothetical protein LbFV_ORF103 [Leptopilina boulardi filamentous virus]
## 677               hypothetical protein LbFV_ORF91 [Leptopilina boulardi filamentous virus]
## 678               hypothetical protein LbFV_ORF90 [Leptopilina boulardi filamentous virus]
## 679               hypothetical protein LbFV_ORF89 [Leptopilina boulardi filamentous virus]
## 680              hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 681               hypothetical protein LbFV_ORF88 [Leptopilina boulardi filamentous virus]
## 682               hypothetical protein LbFV_ORF87 [Leptopilina boulardi filamentous virus]
## 683               hypothetical protein LbFV_ORF86 [Leptopilina boulardi filamentous virus]
## 684               hypothetical protein LbFV_ORF85 [Leptopilina boulardi filamentous virus]
## 685                         MSV199 domain protein [Leptopilina boulardi filamentous virus]
## 686              hypothetical protein LbFV_ORF104 [Leptopilina boulardi filamentous virus]
## 687              hypothetical protein LbFV_ORF100 [Leptopilina boulardi filamentous virus]
## 688               hypothetical protein LbFV_ORF99 [Leptopilina boulardi filamentous virus]
## 689               hypothetical protein LbFV_ORF98 [Leptopilina boulardi filamentous virus]
## 690               hypothetical protein LbFV_ORF97 [Leptopilina boulardi filamentous virus]
## 691               hypothetical protein LbFV_ORF96 [Leptopilina boulardi filamentous virus]
## 692               hypothetical protein LbFV_ORF34 [Leptopilina boulardi filamentous virus]
## 693               hypothetical protein LbFV_ORF33 [Leptopilina boulardi filamentous virus]
## 694               hypothetical protein LbFV_ORF32 [Leptopilina boulardi filamentous virus]
## 695               hypothetical protein LbFV_ORF31 [Leptopilina boulardi filamentous virus]
## 696               hypothetical protein LbFV_ORF30 [Leptopilina boulardi filamentous virus]
## 697               hypothetical protein LbFV_ORF42 [Leptopilina boulardi filamentous virus]
## 698               hypothetical protein LbFV_ORF29 [Leptopilina boulardi filamentous virus]
## 699               hypothetical protein LbFV_ORF28 [Leptopilina boulardi filamentous virus]
## 700               putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 701               hypothetical protein LbFV_ORF41 [Leptopilina boulardi filamentous virus]
## 702               hypothetical protein LbFV_ORF40 [Leptopilina boulardi filamentous virus]
## 703               hypothetical protein LbFV_ORF39 [Leptopilina boulardi filamentous virus]
## 704               hypothetical protein LbFV_ORF38 [Leptopilina boulardi filamentous virus]
## 705               hypothetical protein LbFV_ORF43 [Leptopilina boulardi filamentous virus]
## 706                          nudix domain protein [Leptopilina boulardi filamentous virus]
## 707               hypothetical protein LbFV_ORF36 [Leptopilina boulardi filamentous virus]
## 708               hypothetical protein LbFV_ORF35 [Leptopilina boulardi filamentous virus]
## 709               putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 710 putative lecithin:cholesterol acyltransferase [Leptopilina boulardi filamentous virus]
## 711               putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 712               hypothetical protein LbFV_ORF59 [Leptopilina boulardi filamentous virus]
## 713                              putative DNA pol [Leptopilina boulardi filamentous virus]
## 714               hypothetical protein LbFV_ORF57 [Leptopilina boulardi filamentous virus]
## 715               hypothetical protein LbFV_ORF56 [Leptopilina boulardi filamentous virus]
## 716               hypothetical protein LbFV_ORF55 [Leptopilina boulardi filamentous virus]
## 717               hypothetical protein LbFV_ORF54 [Leptopilina boulardi filamentous virus]
## 718               hypothetical protein LbFV_ORF53 [Leptopilina boulardi filamentous virus]
## 719               hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 720               hypothetical protein LbFV_ORF51 [Leptopilina boulardi filamentous virus]
## 732               hypothetical protein LbFV_ORF79 [Leptopilina boulardi filamentous virus]
## 733               hypothetical protein LbFV_ORF77 [Leptopilina boulardi filamentous virus]
## 734               hypothetical protein LbFV_ORF80 [Leptopilina boulardi filamentous virus]
## 735                               putative ATPase [Leptopilina boulardi filamentous virus]
## 736               hypothetical protein LbFV_ORF82 [Leptopilina boulardi filamentous virus]
## 737               hypothetical protein LbFV_ORF83 [Leptopilina boulardi filamentous virus]
## 738               hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 739               hypothetical protein LbFV_ORF78 [Leptopilina boulardi filamentous virus]
## 740               hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 763               hypothetical protein LbFV_ORF62 [Leptopilina boulardi filamentous virus]
## 764               hypothetical protein LbFV_ORF64 [Leptopilina boulardi filamentous virus]
## 765                            mucin-like protein [Leptopilina boulardi filamentous virus]
## 766               hypothetical protein LbFV_ORF63 [Leptopilina boulardi filamentous virus]

Drosophila associated filamentous virus => LhFV ?

contig_set=paste0("contig_",c(9355, 21206, 19696, 3127, 356, 682, 22485, 223,701,22588, 2709, 19153))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'LhFV_L.h'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "LhFV")
##                        orf_name        seqid    source type start   end score
## 477     contig_19153_139_1095_- contig_19153 getorf_JV gene   139  1095     .
## 489     contig_19696_103_1077_- contig_19696 getorf_JV gene   103  1077     .
## 564      contig_21206_201_422_- contig_21206 getorf_JV gene   201   422     .
## 565      contig_21206_720_917_+ contig_21206 getorf_JV gene   720   917     .
## 617    contig_223_10243_11955_+   contig_223 getorf_JV gene 10243 11955     .
## 618    contig_223_12586_14052_+   contig_223 getorf_JV gene 12586 14052     .
## 619    contig_223_14076_14873_+   contig_223 getorf_JV gene 14076 14873     .
## 620    contig_223_14890_15255_-   contig_223 getorf_JV gene 14890 15255     .
## 621    contig_223_15439_15909_-   contig_223 getorf_JV gene 15439 15909     .
## 622    contig_223_15905_16648_-   contig_223 getorf_JV gene 15905 16648     .
## 623    contig_223_16672_17559_-   contig_223 getorf_JV gene 16672 17559     .
## 624    contig_223_18129_18755_-   contig_223 getorf_JV gene 18129 18755     .
## 625    contig_223_18984_19181_+   contig_223 getorf_JV gene 18984 19181     .
## 626    contig_223_19393_21060_+   contig_223 getorf_JV gene 19393 21060     .
## 627    contig_223_21089_21451_+   contig_223 getorf_JV gene 21089 21451     .
## 628    contig_223_21908_22192_+   contig_223 getorf_JV gene 21908 22192     .
## 629    contig_223_22185_22454_+   contig_223 getorf_JV gene 22185 22454     .
## 630    contig_223_22622_22777_-   contig_223 getorf_JV gene 22622 22777     .
## 631    contig_223_22899_23939_-   contig_223 getorf_JV gene 22899 23939     .
## 632      contig_223_2316_4085_+   contig_223 getorf_JV gene  2316  4085     .
## 633    contig_223_24326_24619_-   contig_223 getorf_JV gene 24326 24619     .
## 634    contig_223_24658_25089_-   contig_223 getorf_JV gene 24658 25089     .
## 635    contig_223_25085_25825_-   contig_223 getorf_JV gene 25085 25825     .
## 636    contig_223_26022_26183_-   contig_223 getorf_JV gene 26022 26183     .
## 637    contig_223_26330_26608_-   contig_223 getorf_JV gene 26330 26608     .
## 638    contig_223_26647_28743_+   contig_223 getorf_JV gene 26647 28743     .
## 639    contig_223_28886_29104_+   contig_223 getorf_JV gene 28886 29104     .
## 640      contig_223_4284_4457_+   contig_223 getorf_JV gene  4284  4457     .
## 641      contig_223_4474_4782_+   contig_223 getorf_JV gene  4474  4782     .
## 642      contig_223_4766_5794_+   contig_223 getorf_JV gene  4766  5794     .
## 643      contig_223_5855_7516_+   contig_223 getorf_JV gene  5855  7516     .
## 644      contig_223_7581_8423_-   contig_223 getorf_JV gene  7581  8423     .
## 645      contig_223_8456_8947_-   contig_223 getorf_JV gene  8456  8947     .
## 646        contig_223_85_2166_-   contig_223 getorf_JV gene    85  2166     .
## 647      contig_223_8954_9118_-   contig_223 getorf_JV gene  8954  9118     .
## 648     contig_223_9219_10037_+   contig_223 getorf_JV gene  9219 10037     .
## 721  contig_22485_10256_10753_+ contig_22485 getorf_JV gene 10256 10753     .
## 722    contig_22485_1672_1980_+ contig_22485 getorf_JV gene  1672  1980     .
## 723        contig_22485_2_235_+ contig_22485 getorf_JV gene     2   235     .
## 724    contig_22485_2089_3477_+ contig_22485 getorf_JV gene  2089  3477     .
## 725     contig_22485_331_1332_+ contig_22485 getorf_JV gene   331  1332     .
## 726    contig_22485_3511_6681_- contig_22485 getorf_JV gene  3511  6681     .
## 727    contig_22485_6697_7524_+ contig_22485 getorf_JV gene  6697  7524     .
## 728    contig_22485_7546_8424_+ contig_22485 getorf_JV gene  7546  8424     .
## 729    contig_22485_8462_9043_+ contig_22485 getorf_JV gene  8462  9043     .
## 730    contig_22485_9131_9835_+ contig_22485 getorf_JV gene  9131  9835     .
## 731   contig_22485_9897_10253_+ contig_22485 getorf_JV gene  9897 10253     .
## 741    contig_22588_1466_2713_+ contig_22588 getorf_JV gene  1466  2713     .
## 742    contig_22588_2922_3827_+ contig_22588 getorf_JV gene  2922  3827     .
## 743      contig_22588_314_511_- contig_22588 getorf_JV gene   314   511     .
## 744    contig_22588_3876_4133_- contig_22588 getorf_JV gene  3876  4133     .
## 745    contig_22588_4337_4621_- contig_22588 getorf_JV gene  4337  4621     .
## 746    contig_22588_4626_5651_- contig_22588 getorf_JV gene  4626  5651     .
## 747      contig_22588_495_713_- contig_22588 getorf_JV gene   495   713     .
## 748    contig_22588_5708_7849_+ contig_22588 getorf_JV gene  5708  7849     .
## 749     contig_22588_713_1453_- contig_22588 getorf_JV gene   713  1453     .
## 816      contig_2709_183_2489_-  contig_2709 getorf_JV gene   183  2489     .
## 817     contig_2709_2485_3339_-  contig_2709 getorf_JV gene  2485  3339     .
## 818     contig_2709_3512_3682_+  contig_2709 getorf_JV gene  3512  3682     .
## 819     contig_2709_3724_4242_+  contig_2709 getorf_JV gene  3724  4242     .
## 854     contig_3127_1203_2399_+  contig_3127 getorf_JV gene  1203  2399     .
## 855        contig_3127_21_173_-  contig_3127 getorf_JV gene    21   173     .
## 856      contig_3127_230_1123_+  contig_3127 getorf_JV gene   230  1123     .
## 857     contig_3127_2426_2989_+  contig_3127 getorf_JV gene  2426  2989     .
## 858     contig_3127_3036_3938_+  contig_3127 getorf_JV gene  3036  3938     .
## 893    contig_356_11400_11675_+   contig_356 getorf_JV gene 11400 11675     .
## 894    contig_356_11866_12045_+   contig_356 getorf_JV gene 11866 12045     .
## 895    contig_356_12432_13463_-   contig_356 getorf_JV gene 12432 13463     .
## 896        contig_356_129_323_+   contig_356 getorf_JV gene   129   323     .
## 897    contig_356_13522_18744_-   contig_356 getorf_JV gene 13522 18744     .
## 898      contig_356_1416_2081_+   contig_356 getorf_JV gene  1416  2081     .
## 899    contig_356_18732_18896_-   contig_356 getorf_JV gene 18732 18896     .
## 900    contig_356_19208_20314_+   contig_356 getorf_JV gene 19208 20314     .
## 901    contig_356_20314_20475_-   contig_356 getorf_JV gene 20314 20475     .
## 902      contig_356_2116_6132_+   contig_356 getorf_JV gene  2116  6132     .
## 903       contig_356_375_1388_-   contig_356 getorf_JV gene   375  1388     .
## 904      contig_356_6373_8277_+   contig_356 getorf_JV gene  6373  8277     .
## 905      contig_356_8294_9499_+   contig_356 getorf_JV gene  8294  9499     .
## 906     contig_356_9543_11363_-   contig_356 getorf_JV gene  9543 11363     .
## 1054   contig_682_10487_11527_+   contig_682 getorf_JV gene 10487 11527     .
## 1055   contig_682_11514_12587_-   contig_682 getorf_JV gene 11514 12587     .
## 1056       contig_682_143_523_-   contig_682 getorf_JV gene   143   523     .
## 1057     contig_682_1632_3863_+   contig_682 getorf_JV gene  1632  3863     .
## 1058     contig_682_4002_4976_-   contig_682 getorf_JV gene  4002  4976     .
## 1059     contig_682_5074_6042_-   contig_682 getorf_JV gene  5074  6042     .
## 1060      contig_682_553_1530_+   contig_682 getorf_JV gene   553  1530     .
## 1061     contig_682_6051_6554_-   contig_682 getorf_JV gene  6051  6554     .
## 1062     contig_682_6594_6875_-   contig_682 getorf_JV gene  6594  6875     .
## 1063     contig_682_6900_7913_-   contig_682 getorf_JV gene  6900  7913     .
## 1064     contig_682_8085_8537_+   contig_682 getorf_JV gene  8085  8537     .
## 1065     contig_682_8614_9309_-   contig_682 getorf_JV gene  8614  9309     .
## 1066     contig_682_9399_9851_+   contig_682 getorf_JV gene  9399  9851     .
## 1067    contig_682_9856_10461_+   contig_682 getorf_JV gene  9856 10461     .
## 1068   contig_701_10875_11123_-   contig_701 getorf_JV gene 10875 11123     .
## 1069   contig_701_11309_12256_+   contig_701 getorf_JV gene 11309 12256     .
## 1070       contig_701_577_804_-   contig_701 getorf_JV gene   577   804     .
## 1071     contig_701_6141_6950_-   contig_701 getorf_JV gene  6141  6950     .
## 1072     contig_701_7408_7818_-   contig_701 getorf_JV gene  7408  7818     .
## 1073     contig_701_7954_9372_-   contig_701 getorf_JV gene  7954  9372     .
## 1074      contig_701_800_6073_-   contig_701 getorf_JV gene   800  6073     .
## 1075     contig_701_9410_9571_-   contig_701 getorf_JV gene  9410  9571     .
## 1076    contig_701_9683_10882_-   contig_701 getorf_JV gene  9683 10882     .
## 1253    contig_9355_1174_1404_-  contig_9355 getorf_JV gene  1174  1404     .
## 1254    contig_9355_1524_1712_-  contig_9355 getorf_JV gene  1524  1712     .
## 1255      contig_9355_190_942_-  contig_9355 getorf_JV gene   190   942     .
##      strand phase attributes seq_length     subject_id identity
## 477   FALSE     1        957       1096     QKN22475.1    0.965
## 489   FALSE     1        975       1079     QKN22518.1    0.993
## 564   FALSE     1        222       1032           <NA>       NA
## 565    TRUE     1        198       1032           <NA>       NA
## 617    TRUE     1       1713      29250     QKN22474.1    0.942
## 618    TRUE     1       1467      29250     QKN22475.1    0.946
## 619    TRUE     1        798      29250     QKN22476.1    0.943
## 620   FALSE     1        366      29250     QKN22472.1    0.975
## 621   FALSE     1        471      29250     QKN22471.1    0.979
## 622   FALSE     1        744      29250     QKN22470.1    0.951
## 623   FALSE     1        888      29250     QKN22469.1    0.972
## 624   FALSE     1        627      29250     QKN22467.1    0.980
## 625    TRUE     1        198      29250           <NA>       NA
## 626    TRUE     1       1668      29250     QKN22477.1    0.982
## 627    TRUE     1        363      29250           <NA>       NA
## 628    TRUE     1        285      29250 YP_009345614.1    0.425
## 629    TRUE     1        270      29250           <NA>       NA
## 630   FALSE     1        156      29250           <NA>       NA
## 631   FALSE     1       1041      29250     QKN22465.1    0.974
## 632    TRUE     1       1770      29250     QKN22521.1    0.993
## 633   FALSE     1        294      29250           <NA>       NA
## 634   FALSE     1        432      29250           <NA>       NA
## 635   FALSE     1        741      29250           <NA>       NA
## 636   FALSE     1        162      29250           <NA>       NA
## 637   FALSE     1        279      29250           <NA>       NA
## 638    TRUE     1       2097      29250 YP_009345672.1    0.349
## 639    TRUE     1        219      29250     QKN22507.1    0.731
## 640    TRUE     1        174      29250           <NA>       NA
## 641    TRUE     1        309      29250           <NA>       NA
## 642    TRUE     1       1029      29250           <NA>       NA
## 643    TRUE     1       1662      29250 YP_009345685.1    0.346
## 644   FALSE     1        843      29250 YP_009345671.1    0.333
## 645   FALSE     1        492      29250           <NA>       NA
## 646   FALSE     1       2082      29250     QKN22522.1    0.985
## 647   FALSE     1        165      29250           <NA>       NA
## 648    TRUE     1        819      29250     QKN22473.1    0.995
## 721    TRUE     1        498      10753     QKN22515.1    1.000
## 722    TRUE     1        309      10753     QKN22501.1    1.000
## 723    TRUE     1        234      10753     QKN22499.1    0.853
## 724    TRUE     1       1389      10753     QKN22502.1    0.989
## 725    TRUE     1       1002      10753     QKN22500.1    0.991
## 726   FALSE     1       3171      10753     QKN22516.1    0.982
## 727    TRUE     1        828      10753     QKN22510.1    0.960
## 728    TRUE     1        879      10753     QKN22511.1    0.993
## 729    TRUE     1        582      10753     QKN22512.1    0.989
## 730    TRUE     1        705      10753     QKN22513.1    0.987
## 731    TRUE     1        357      10753     QKN22514.1    0.957
## 741    TRUE     1       1248       7997     QKN22504.1    0.987
## 742    TRUE     1        906       7997     QKN22505.1    1.000
## 743   FALSE     1        198       7997           <NA>       NA
## 744   FALSE     1        258       7997           <NA>       NA
## 745   FALSE     1        285       7997           <NA>       NA
## 746   FALSE     1       1026       7997     QKN22508.1    0.979
## 747   FALSE     1        219       7997           <NA>       NA
## 748    TRUE     1       2142       7997     QKN22506.1    0.976
## 749   FALSE     1        741       7997     QKN22509.1    0.995
## 816   FALSE     1       2307       4458     QKN22456.1    0.990
## 817   FALSE     1        855       4458     QKN22455.1    0.989
## 818    TRUE     1        171       4458           <NA>       NA
## 819    TRUE     1        519       4458 XP_002083176.1    0.396
## 854    TRUE     1       1197       3938     QKN22497.1    0.992
## 855   FALSE     1        153       3938           <NA>       NA
## 856    TRUE     1        894       3938     QKN22496.1    0.993
## 857    TRUE     1        564       3938     QKN22498.1    0.984
## 858    TRUE     1        903       3938     QKN22499.1    0.858
## 893    TRUE     1        276      20719           <NA>       NA
## 894    TRUE     1        180      20719           <NA>       NA
## 895   FALSE     1       1032      20719     QKN22479.1    0.916
## 896    TRUE     1        195      20719           <NA>       NA
## 897   FALSE     1       5223      20719     QKN22478.1    0.977
## 898    TRUE     1        666      20719     QKN22517.1    0.977
## 899   FALSE     1        165      20719           <NA>       NA
## 900    TRUE     1       1107      20719     QKN22483.1    0.981
## 901   FALSE     1        162      20719           <NA>       NA
## 902    TRUE     1       4017      20719     QKN22518.1    0.878
## 903   FALSE     1       1014      20719     QKN22519.1    0.991
## 904    TRUE     1       1905      20719           <NA>       NA
## 905    TRUE     1       1206      20719 YP_009345656.1    0.289
## 906   FALSE     1       1821      20719     QKN22481.1    0.978
## 1054   TRUE     1       1041      12802 YP_009345706.1    0.296
## 1055  FALSE     1       1074      12802           <NA>       NA
## 1056  FALSE     1        381      12802           <NA>       NA
## 1057   TRUE     1       2232      12802     QKN22485.1    0.979
## 1058  FALSE     1        975      12802     QKN22492.1    0.933
## 1059  FALSE     1        969      12802     QKN22491.1    0.987
## 1060   TRUE     1        978      12802     QKN22484.1    0.957
## 1061  FALSE     1        504      12802     QKN22490.1    1.000
## 1062  FALSE     1        282      12802           <NA>       NA
## 1063  FALSE     1       1014      12802     QKN22489.1    0.970
## 1064   TRUE     1        453      12802     QKN22487.1    0.993
## 1065  FALSE     1        696      12802     QKN22488.1    0.990
## 1066   TRUE     1        453      12802           <NA>       NA
## 1067   TRUE     1        606      12802           <NA>       NA
## 1068  FALSE     1        249      12482           <NA>       NA
## 1069   TRUE     1        948      12482     QKN22463.1    0.958
## 1070  FALSE     1        228      12482           <NA>       NA
## 1071  FALSE     1        810      12482     QKN22460.1    0.977
## 1072  FALSE     1        411      12482     QKN22459.1    0.978
## 1073  FALSE     1       1419      12482     QKN22458.1    0.987
## 1074  FALSE     1       5274      12482     QKN22461.1    0.977
## 1075  FALSE     1        162      12482           <NA>       NA
## 1076  FALSE     1       1200      12482     QKN22457.1    0.990
## 1253  FALSE     1        231       1744     QKN22523.1    0.680
## 1254  FALSE     1        189       1744     QKN22523.1    0.677
## 1255  FALSE     1        753       1744     QKN22523.1    0.988
##      alignment_length mismatches gap_opens qstart qend sstart send
## 477               319         11         0      1  319    161  473
## 489               295          2         0      1  295    959 1252
## 564                NA         NA        NA     NA   NA     NA   NA
## 565                NA         NA        NA     NA   NA     NA   NA
## 617               571         33         0      1  571      1  570
## 618               489         25         0      1  489      1  473
## 619               266         15         0      1  266      1  259
## 620               122          3         0      1  122      1  122
## 621               146          3         0      8  153      2  147
## 622               248         12         0      1  248      1  238
## 623               296          8         0      1  296      1  291
## 624               209          4         0      1  209      1  207
## 625                NA         NA        NA     NA   NA     NA   NA
## 626               556         10         0      1  556      1  556
## 627                NA         NA        NA     NA   NA     NA   NA
## 628                47         26         0     41   87     38   84
## 629                NA         NA        NA     NA   NA     NA   NA
## 630                NA         NA        NA     NA   NA     NA   NA
## 631               347          9         0      1  347      1  346
## 632               589          4         0      1  589      1  588
## 633                NA         NA        NA     NA   NA     NA   NA
## 634                NA         NA        NA     NA   NA     NA   NA
## 635                NA         NA        NA     NA   NA     NA   NA
## 636                NA         NA        NA     NA   NA     NA   NA
## 637                NA         NA        NA     NA   NA     NA   NA
## 638               615        384         0     83  697     52  642
## 639                67         16         0      7   68     35  101
## 640                NA         NA        NA     NA   NA     NA   NA
## 641                NA         NA        NA     NA   NA     NA   NA
## 642                NA         NA        NA     NA   NA     NA   NA
## 643               457        297         0     92  548     78  532
## 644               287        185         0      3  281     13  299
## 645                NA         NA        NA     NA   NA     NA   NA
## 646               694         10         0      1  694      1  693
## 647                NA         NA        NA     NA   NA     NA   NA
## 648               244          1         0     30  273      1  244
## 721               166          0         0      1  166      1  166
## 722               102          0         0      1  102      1  102
## 723                73         10         0      6   78    314  385
## 724               463          5         0      1  463      1  462
## 725               334          3         0      1  334      1  334
## 726               704         13         0      1  704     30  728
## 727               276         11         0      1  276      1  269
## 728               292          2         0      1  292      1  292
## 729               194          2         0      1  194      1  194
## 730               235          3         0      1  235      1  233
## 731               119          5         0      1  119      1  116
## 741               416          5         0      1  416      1  412
## 742               302          0         0      1  302      1  302
## 743                NA         NA        NA     NA   NA     NA   NA
## 744                NA         NA        NA     NA   NA     NA   NA
## 745                NA         NA        NA     NA   NA     NA   NA
## 746               346          7         0      1  342      1  346
## 747                NA         NA        NA     NA   NA     NA   NA
## 748               714         17         0      1  714     19  727
## 749               247          1         0      1  247      1  247
## 816               769          8         0      1  769      1  769
## 817               285          3         0      1  285      1  285
## 818                NA         NA        NA     NA   NA     NA   NA
## 819               101         60         0     35  134     30  130
## 854               399          3         0      1  399      1  399
## 855                NA         NA        NA     NA   NA     NA   NA
## 856               298          2         0      1  298      1  298
## 857               188          3         0      1  188      1  188
## 858               333         43         0      1  301      1  333
## 893                NA         NA        NA     NA   NA     NA   NA
## 894                NA         NA        NA     NA   NA     NA   NA
## 895               349         29         0      1  344      1  349
## 896                NA         NA        NA     NA   NA     NA   NA
## 897              1747         40         0      1 1741      1 1747
## 898               222          5         0      1  222      1  218
## 899                NA         NA        NA     NA   NA     NA   NA
## 900               369          7         0      1  369      1  369
## 901                NA         NA        NA     NA   NA     NA   NA
## 902              1271        153         0     39 1309      1 1252
## 903               338          3         0      1  338      1  338
## 904                NA         NA        NA     NA   NA     NA   NA
## 905               389        247         0      9  397     11  358
## 906               617         13         0      1  607      1  617
## 1054              347        235         0      3  337      5  351
## 1055               NA         NA        NA     NA   NA     NA   NA
## 1056               NA         NA        NA     NA   NA     NA   NA
## 1057              744         15         0      1  744      1  737
## 1058              316         21         0      1  314      1  316
## 1059              323          4         0      1  323      1  320
## 1060              326         14         0      1  326      1  326
## 1061              168          0         0      1  168      1  168
## 1062               NA         NA        NA     NA   NA     NA   NA
## 1063              338         10         0      1  338      1  335
## 1064              151          1         0      1  151      1  151
## 1065              104          1         0    129  232      1  104
## 1066               NA         NA        NA     NA   NA     NA   NA
## 1067               NA         NA        NA     NA   NA     NA   NA
## 1068               NA         NA        NA     NA   NA     NA   NA
## 1069              316         13         0      1  316      1  316
## 1070               NA         NA        NA     NA   NA     NA   NA
## 1071              270          6         0      1  270      1  270
## 1072              137          3         0      1  137      1  137
## 1073              473          6         0      1  473      1  473
## 1074             1762         40         0      1 1758      1 1762
## 1075               NA         NA        NA     NA   NA     NA   NA
## 1076              400          4         0      1  400      1  398
## 1253               73         22         0      1   69     54  126
## 1254               57         17         0      4   58    103  159
## 1255              251          3         0      1  251      1  251
##             evalue bitscore
## 477  1.310000e-201      624
## 489  9.019000e-192      596
## 564             NA       NA
## 565             NA       NA
## 617  2.574082e-321      984
## 618  2.527000e-295      904
## 619  1.383000e-152      480
## 620   4.059000e-75      248
## 621   9.200000e-93      301
## 622  4.351000e-153      480
## 623  1.220000e-182      568
## 624  7.725000e-122      388
## 625             NA       NA
## 626   0.000000e+00     1126
## 627             NA       NA
## 628   9.676000e-06       47
## 629             NA       NA
## 630             NA       NA
## 631  2.014000e-218      674
## 632   0.000000e+00     1204
## 633             NA       NA
## 634             NA       NA
## 635             NA       NA
## 636             NA       NA
## 637             NA       NA
## 638  1.353000e-100      349
## 639   1.973000e-22       94
## 640             NA       NA
## 641             NA       NA
## 642             NA       NA
## 643   6.465000e-67      244
## 644   8.672000e-39      151
## 645             NA       NA
## 646   0.000000e+00     1401
## 647             NA       NA
## 648  2.505000e-145      459
## 721  6.913000e-109      348
## 722   4.516000e-62      209
## 723   1.145000e-24      100
## 724  1.210000e-303      927
## 725  1.297000e-223      689
## 726   0.000000e+00     1390
## 727  3.561000e-157      494
## 728  1.155000e-196      608
## 729  1.959000e-125      397
## 730  9.427000e-151      473
## 731   6.826000e-58      198
## 741  9.498000e-269      824
## 742  1.090000e-191      595
## 743             NA       NA
## 744             NA       NA
## 745             NA       NA
## 746  1.448000e-217      672
## 747             NA       NA
## 748   0.000000e+00     1448
## 749  1.724000e-157      493
## 816   0.000000e+00     1535
## 817  2.571000e-182      567
## 818             NA       NA
## 819   1.455000e-21       96
## 854  4.731000e-270      826
## 855             NA       NA
## 856  2.112000e-190      591
## 857  5.552000e-119      378
## 858  2.016000e-175      548
## 893             NA       NA
## 894             NA       NA
## 895  3.993000e-207      642
## 896             NA       NA
## 897   0.000000e+00     3397
## 898  1.080000e-138      437
## 899             NA       NA
## 900  7.696000e-244      749
## 901             NA       NA
## 902   0.000000e+00     2224
## 903  1.898000e-225      694
## 904             NA       NA
## 905   7.797000e-40      159
## 906   0.000000e+00     1211
## 1054  5.965000e-32      134
## 1055            NA       NA
## 1056            NA       NA
## 1057  0.000000e+00     1435
## 1058 6.033000e-162      510
## 1059 7.103000e-197      611
## 1060 5.392000e-209      646
## 1061 1.043000e-108      347
## 1062            NA       NA
## 1063 1.570000e-210      651
## 1064  2.352000e-95      308
## 1065  2.531000e-58      206
## 1066            NA       NA
## 1067            NA       NA
## 1068            NA       NA
## 1069 3.813000e-207      640
## 1070            NA       NA
## 1071 4.182000e-178      554
## 1072  3.896000e-77      255
## 1073 1.345000e-309      945
## 1074  0.000000e+00     3413
## 1075            NA       NA
## 1076 9.075000e-260      797
## 1253  4.144000e-17       79
## 1254  8.819000e-15       71
## 1255 2.666000e-153      481
##                                                                                    annotation
## 477                             putative protein 21 [Drosophila-associated filamentous virus]
## 489                      putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 564                                                                                      <NA>
## 565                                                                                      <NA>
## 617                             putative protein 20 [Drosophila-associated filamentous virus]
## 618                             putative protein 21 [Drosophila-associated filamentous virus]
## 619                             putative protein 22 [Drosophila-associated filamentous virus]
## 620                             putative protein 18 [Drosophila-associated filamentous virus]
## 621                             putative protein 17 [Drosophila-associated filamentous virus]
## 622                                  putative ORF19 [Drosophila-associated filamentous virus]
## 623                             putative protein 15 [Drosophila-associated filamentous virus]
## 624                             putative protein 13 [Drosophila-associated filamentous virus]
## 625                                                                                      <NA>
## 626                    putative JmJC domain protein [Drosophila-associated filamentous virus]
## 627                                                                                      <NA>
## 628                  hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 629                                                                                      <NA>
## 630                                                                                      <NA>
## 631                             putative protein 11 [Drosophila-associated filamentous virus]
## 632                                 putative ORF107 [Drosophila-associated filamentous virus]
## 633                                                                                      <NA>
## 634                                                                                      <NA>
## 635                                                                                      <NA>
## 636                                                                                      <NA>
## 637                                                                                      <NA>
## 638                  hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 639                             putative protein 53 [Drosophila-associated filamentous virus]
## 640                                                                                      <NA>
## 641                                                                                      <NA>
## 642                                                                                      <NA>
## 643                                  putative ATPase [Leptopilina boulardi filamentous virus]
## 644                  hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 645                                                                                      <NA>
## 646                             putative protein 65 [Drosophila-associated filamentous virus]
## 647                                                                                      <NA>
## 648                             putative protein 19 [Drosophila-associated filamentous virus]
## 721                             putative protein 61 [Drosophila-associated filamentous virus]
## 722                             putative protein 47 [Drosophila-associated filamentous virus]
## 723                             putative protein 45 [Drosophila-associated filamentous virus]
## 724                             putative protein 48 [Drosophila-associated filamentous virus]
## 725                                  putative ORF43 [Drosophila-associated filamentous virus]
## 726                         putative ORF96, partial [Drosophila-associated filamentous virus]
## 727                             putative protein 56 [Drosophila-associated filamentous virus]
## 728                             putative protein 57 [Drosophila-associated filamentous virus]
## 729                             putative protein 58 [Drosophila-associated filamentous virus]
## 730                      putative Ac81-like protein [Drosophila-associated filamentous virus]
## 731                             putative protein 60 [Drosophila-associated filamentous virus]
## 741  putative lecithine cholesterol acyltransferase [Drosophila-associated filamentous virus]
## 742                             putative protein 51 [Drosophila-associated filamentous virus]
## 743                                                                                      <NA>
## 744                                                                                      <NA>
## 745                                                                                      <NA>
## 746                             putative protein 54 [Drosophila-associated filamentous virus]
## 747                                                                                      <NA>
## 748                               PIF1-like protein [Drosophila-associated filamentous virus]
## 749                             putative protein 55 [Drosophila-associated filamentous virus]
## 816                                P74-like protein [Drosophila-associated filamentous virus]
## 817                              putative protein 1 [Drosophila-associated filamentous virus]
## 818                                                                                      <NA>
## 819                                                          lysozyme X [Drosophila simulans]
## 854                                  putative ORF20 [Drosophila-associated filamentous virus]
## 855                                                                                      <NA>
## 856                             putative protein 42 [Drosophila-associated filamentous virus]
## 857                             putative protein 44 [Drosophila-associated filamentous virus]
## 858                             putative protein 45 [Drosophila-associated filamentous virus]
## 893                                                                                      <NA>
## 894                                                                                      <NA>
## 895                             putative protein 25 [Drosophila-associated filamentous virus]
## 896                                                                                      <NA>
## 897                               PIF2-like protein [Drosophila-associated filamentous virus]
## 898                             putative protein 67 [Drosophila-associated filamentous virus]
## 899                                                                                      <NA>
## 900                             putative protein 29 [Drosophila-associated filamentous virus]
## 901                                                                                      <NA>
## 902                      putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 903                             putative protein 69 [Drosophila-associated filamentous virus]
## 904                                                                                      <NA>
## 905                  hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 906                             putative protein 27 [Drosophila-associated filamentous virus]
## 1054                hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 1055                                                                                     <NA>
## 1056                                                                                     <NA>
## 1057                                  putative ORF5 [Drosophila-associated filamentous virus]
## 1058                            putative protein 38 [Drosophila-associated filamentous virus]
## 1059                            putative protein 37 [Drosophila-associated filamentous virus]
## 1060                            putative protein 30 [Drosophila-associated filamentous virus]
## 1061                            putative protein 36 [Drosophila-associated filamentous virus]
## 1062                                                                                     <NA>
## 1063                                 putative ORF24 [Drosophila-associated filamentous virus]
## 1064                            putative protein 33 [Drosophila-associated filamentous virus]
## 1065                            putative protein 34 [Drosophila-associated filamentous virus]
## 1066                                                                                     <NA>
## 1067                                                                                     <NA>
## 1068                                                                                     <NA>
## 1069                                  putative ORF1 [Drosophila-associated filamentous virus]
## 1070                                                                                     <NA>
## 1071                  putative nudix domain protein [Drosophila-associated filamentous virus]
## 1072                             putative protein 5 [Drosophila-associated filamentous virus]
## 1073                             putative protein 4 [Drosophila-associated filamentous virus]
## 1074                             putative protein 7 [Drosophila-associated filamentous virus]
## 1075                                                                                     <NA>
## 1076                                  putative ORF2 [Drosophila-associated filamentous virus]
## 1253                           ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1254                           ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1255                           ODV-E66-like protein [Drosophila-associated filamentous virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                        orf_name        seqid    source type start   end score
## 477     contig_19153_139_1095_- contig_19153 getorf_JV gene   139  1095     .
## 489     contig_19696_103_1077_- contig_19696 getorf_JV gene   103  1077     .
## 564      contig_21206_201_422_- contig_21206 getorf_JV gene   201   422     .
## 565      contig_21206_720_917_+ contig_21206 getorf_JV gene   720   917     .
## 617    contig_223_10243_11955_+   contig_223 getorf_JV gene 10243 11955     .
## 618    contig_223_12586_14052_+   contig_223 getorf_JV gene 12586 14052     .
## 619    contig_223_14076_14873_+   contig_223 getorf_JV gene 14076 14873     .
## 620    contig_223_14890_15255_-   contig_223 getorf_JV gene 14890 15255     .
## 621    contig_223_15439_15909_-   contig_223 getorf_JV gene 15439 15909     .
## 622    contig_223_15905_16648_-   contig_223 getorf_JV gene 15905 16648     .
## 623    contig_223_16672_17559_-   contig_223 getorf_JV gene 16672 17559     .
## 624    contig_223_18129_18755_-   contig_223 getorf_JV gene 18129 18755     .
## 625    contig_223_18984_19181_+   contig_223 getorf_JV gene 18984 19181     .
## 626    contig_223_19393_21060_+   contig_223 getorf_JV gene 19393 21060     .
## 627    contig_223_21089_21451_+   contig_223 getorf_JV gene 21089 21451     .
## 628    contig_223_21908_22192_+   contig_223 getorf_JV gene 21908 22192     .
## 629    contig_223_22185_22454_+   contig_223 getorf_JV gene 22185 22454     .
## 630    contig_223_22622_22777_-   contig_223 getorf_JV gene 22622 22777     .
## 631    contig_223_22899_23939_-   contig_223 getorf_JV gene 22899 23939     .
## 632      contig_223_2316_4085_+   contig_223 getorf_JV gene  2316  4085     .
## 633    contig_223_24326_24619_-   contig_223 getorf_JV gene 24326 24619     .
## 634    contig_223_24658_25089_-   contig_223 getorf_JV gene 24658 25089     .
## 635    contig_223_25085_25825_-   contig_223 getorf_JV gene 25085 25825     .
## 636    contig_223_26022_26183_-   contig_223 getorf_JV gene 26022 26183     .
## 637    contig_223_26330_26608_-   contig_223 getorf_JV gene 26330 26608     .
## 638    contig_223_26647_28743_+   contig_223 getorf_JV gene 26647 28743     .
## 639    contig_223_28886_29104_+   contig_223 getorf_JV gene 28886 29104     .
## 640      contig_223_4284_4457_+   contig_223 getorf_JV gene  4284  4457     .
## 641      contig_223_4474_4782_+   contig_223 getorf_JV gene  4474  4782     .
## 642      contig_223_4766_5794_+   contig_223 getorf_JV gene  4766  5794     .
## 643      contig_223_5855_7516_+   contig_223 getorf_JV gene  5855  7516     .
## 644      contig_223_7581_8423_-   contig_223 getorf_JV gene  7581  8423     .
## 645      contig_223_8456_8947_-   contig_223 getorf_JV gene  8456  8947     .
## 646        contig_223_85_2166_-   contig_223 getorf_JV gene    85  2166     .
## 647      contig_223_8954_9118_-   contig_223 getorf_JV gene  8954  9118     .
## 648     contig_223_9219_10037_+   contig_223 getorf_JV gene  9219 10037     .
## 721  contig_22485_10256_10753_+ contig_22485 getorf_JV gene 10256 10753     .
## 722    contig_22485_1672_1980_+ contig_22485 getorf_JV gene  1672  1980     .
## 723        contig_22485_2_235_+ contig_22485 getorf_JV gene     2   235     .
## 724    contig_22485_2089_3477_+ contig_22485 getorf_JV gene  2089  3477     .
## 725     contig_22485_331_1332_+ contig_22485 getorf_JV gene   331  1332     .
## 726    contig_22485_3511_6681_- contig_22485 getorf_JV gene  3511  6681     .
## 727    contig_22485_6697_7524_+ contig_22485 getorf_JV gene  6697  7524     .
## 728    contig_22485_7546_8424_+ contig_22485 getorf_JV gene  7546  8424     .
## 729    contig_22485_8462_9043_+ contig_22485 getorf_JV gene  8462  9043     .
## 730    contig_22485_9131_9835_+ contig_22485 getorf_JV gene  9131  9835     .
## 731   contig_22485_9897_10253_+ contig_22485 getorf_JV gene  9897 10253     .
## 741    contig_22588_1466_2713_+ contig_22588 getorf_JV gene  1466  2713     .
## 742    contig_22588_2922_3827_+ contig_22588 getorf_JV gene  2922  3827     .
## 743      contig_22588_314_511_- contig_22588 getorf_JV gene   314   511     .
## 744    contig_22588_3876_4133_- contig_22588 getorf_JV gene  3876  4133     .
## 745    contig_22588_4337_4621_- contig_22588 getorf_JV gene  4337  4621     .
## 746    contig_22588_4626_5651_- contig_22588 getorf_JV gene  4626  5651     .
## 747      contig_22588_495_713_- contig_22588 getorf_JV gene   495   713     .
## 748    contig_22588_5708_7849_+ contig_22588 getorf_JV gene  5708  7849     .
## 749     contig_22588_713_1453_- contig_22588 getorf_JV gene   713  1453     .
## 816      contig_2709_183_2489_-  contig_2709 getorf_JV gene   183  2489     .
## 817     contig_2709_2485_3339_-  contig_2709 getorf_JV gene  2485  3339     .
## 818     contig_2709_3512_3682_+  contig_2709 getorf_JV gene  3512  3682     .
## 819     contig_2709_3724_4242_+  contig_2709 getorf_JV gene  3724  4242     .
## 854     contig_3127_1203_2399_+  contig_3127 getorf_JV gene  1203  2399     .
## 855        contig_3127_21_173_-  contig_3127 getorf_JV gene    21   173     .
## 856      contig_3127_230_1123_+  contig_3127 getorf_JV gene   230  1123     .
## 857     contig_3127_2426_2989_+  contig_3127 getorf_JV gene  2426  2989     .
## 858     contig_3127_3036_3938_+  contig_3127 getorf_JV gene  3036  3938     .
## 893    contig_356_11400_11675_+   contig_356 getorf_JV gene 11400 11675     .
## 894    contig_356_11866_12045_+   contig_356 getorf_JV gene 11866 12045     .
## 895    contig_356_12432_13463_-   contig_356 getorf_JV gene 12432 13463     .
## 896        contig_356_129_323_+   contig_356 getorf_JV gene   129   323     .
## 897    contig_356_13522_18744_-   contig_356 getorf_JV gene 13522 18744     .
## 898      contig_356_1416_2081_+   contig_356 getorf_JV gene  1416  2081     .
## 899    contig_356_18732_18896_-   contig_356 getorf_JV gene 18732 18896     .
## 900    contig_356_19208_20314_+   contig_356 getorf_JV gene 19208 20314     .
## 901    contig_356_20314_20475_-   contig_356 getorf_JV gene 20314 20475     .
## 902      contig_356_2116_6132_+   contig_356 getorf_JV gene  2116  6132     .
## 903       contig_356_375_1388_-   contig_356 getorf_JV gene   375  1388     .
## 904      contig_356_6373_8277_+   contig_356 getorf_JV gene  6373  8277     .
## 905      contig_356_8294_9499_+   contig_356 getorf_JV gene  8294  9499     .
## 906     contig_356_9543_11363_-   contig_356 getorf_JV gene  9543 11363     .
## 1054   contig_682_10487_11527_+   contig_682 getorf_JV gene 10487 11527     .
## 1055   contig_682_11514_12587_-   contig_682 getorf_JV gene 11514 12587     .
## 1056       contig_682_143_523_-   contig_682 getorf_JV gene   143   523     .
## 1057     contig_682_1632_3863_+   contig_682 getorf_JV gene  1632  3863     .
## 1058     contig_682_4002_4976_-   contig_682 getorf_JV gene  4002  4976     .
## 1059     contig_682_5074_6042_-   contig_682 getorf_JV gene  5074  6042     .
## 1060      contig_682_553_1530_+   contig_682 getorf_JV gene   553  1530     .
## 1061     contig_682_6051_6554_-   contig_682 getorf_JV gene  6051  6554     .
## 1062     contig_682_6594_6875_-   contig_682 getorf_JV gene  6594  6875     .
## 1063     contig_682_6900_7913_-   contig_682 getorf_JV gene  6900  7913     .
## 1064     contig_682_8085_8537_+   contig_682 getorf_JV gene  8085  8537     .
## 1065     contig_682_8614_9309_-   contig_682 getorf_JV gene  8614  9309     .
## 1066     contig_682_9399_9851_+   contig_682 getorf_JV gene  9399  9851     .
## 1067    contig_682_9856_10461_+   contig_682 getorf_JV gene  9856 10461     .
## 1068   contig_701_10875_11123_-   contig_701 getorf_JV gene 10875 11123     .
## 1069   contig_701_11309_12256_+   contig_701 getorf_JV gene 11309 12256     .
## 1070       contig_701_577_804_-   contig_701 getorf_JV gene   577   804     .
## 1071     contig_701_6141_6950_-   contig_701 getorf_JV gene  6141  6950     .
## 1072     contig_701_7408_7818_-   contig_701 getorf_JV gene  7408  7818     .
## 1073     contig_701_7954_9372_-   contig_701 getorf_JV gene  7954  9372     .
## 1074      contig_701_800_6073_-   contig_701 getorf_JV gene   800  6073     .
## 1075     contig_701_9410_9571_-   contig_701 getorf_JV gene  9410  9571     .
## 1076    contig_701_9683_10882_-   contig_701 getorf_JV gene  9683 10882     .
## 1253    contig_9355_1174_1404_-  contig_9355 getorf_JV gene  1174  1404     .
## 1254    contig_9355_1524_1712_-  contig_9355 getorf_JV gene  1524  1712     .
## 1255      contig_9355_190_942_-  contig_9355 getorf_JV gene   190   942     .
##      strand phase attributes seq_length     subject_id identity
## 477   FALSE     1        957       1096     QKN22475.1    0.965
## 489   FALSE     1        975       1079     QKN22518.1    0.993
## 564   FALSE     1        222       1032           <NA>       NA
## 565    TRUE     1        198       1032           <NA>       NA
## 617    TRUE     1       1713      29250     QKN22474.1    0.942
## 618    TRUE     1       1467      29250     QKN22475.1    0.946
## 619    TRUE     1        798      29250     QKN22476.1    0.943
## 620   FALSE     1        366      29250     QKN22472.1    0.975
## 621   FALSE     1        471      29250     QKN22471.1    0.979
## 622   FALSE     1        744      29250     QKN22470.1    0.951
## 623   FALSE     1        888      29250     QKN22469.1    0.972
## 624   FALSE     1        627      29250     QKN22467.1    0.980
## 625    TRUE     1        198      29250           <NA>       NA
## 626    TRUE     1       1668      29250     QKN22477.1    0.982
## 627    TRUE     1        363      29250           <NA>       NA
## 628    TRUE     1        285      29250 YP_009345614.1    0.425
## 629    TRUE     1        270      29250           <NA>       NA
## 630   FALSE     1        156      29250           <NA>       NA
## 631   FALSE     1       1041      29250     QKN22465.1    0.974
## 632    TRUE     1       1770      29250     QKN22521.1    0.993
## 633   FALSE     1        294      29250           <NA>       NA
## 634   FALSE     1        432      29250           <NA>       NA
## 635   FALSE     1        741      29250           <NA>       NA
## 636   FALSE     1        162      29250           <NA>       NA
## 637   FALSE     1        279      29250           <NA>       NA
## 638    TRUE     1       2097      29250 YP_009345672.1    0.349
## 639    TRUE     1        219      29250     QKN22507.1    0.731
## 640    TRUE     1        174      29250           <NA>       NA
## 641    TRUE     1        309      29250           <NA>       NA
## 642    TRUE     1       1029      29250           <NA>       NA
## 643    TRUE     1       1662      29250 YP_009345685.1    0.346
## 644   FALSE     1        843      29250 YP_009345671.1    0.333
## 645   FALSE     1        492      29250           <NA>       NA
## 646   FALSE     1       2082      29250     QKN22522.1    0.985
## 647   FALSE     1        165      29250           <NA>       NA
## 648    TRUE     1        819      29250     QKN22473.1    0.995
## 721    TRUE     1        498      10753     QKN22515.1    1.000
## 722    TRUE     1        309      10753     QKN22501.1    1.000
## 723    TRUE     1        234      10753     QKN22499.1    0.853
## 724    TRUE     1       1389      10753     QKN22502.1    0.989
## 725    TRUE     1       1002      10753     QKN22500.1    0.991
## 726   FALSE     1       3171      10753     QKN22516.1    0.982
## 727    TRUE     1        828      10753     QKN22510.1    0.960
## 728    TRUE     1        879      10753     QKN22511.1    0.993
## 729    TRUE     1        582      10753     QKN22512.1    0.989
## 730    TRUE     1        705      10753     QKN22513.1    0.987
## 731    TRUE     1        357      10753     QKN22514.1    0.957
## 741    TRUE     1       1248       7997     QKN22504.1    0.987
## 742    TRUE     1        906       7997     QKN22505.1    1.000
## 743   FALSE     1        198       7997           <NA>       NA
## 744   FALSE     1        258       7997           <NA>       NA
## 745   FALSE     1        285       7997           <NA>       NA
## 746   FALSE     1       1026       7997     QKN22508.1    0.979
## 747   FALSE     1        219       7997           <NA>       NA
## 748    TRUE     1       2142       7997     QKN22506.1    0.976
## 749   FALSE     1        741       7997     QKN22509.1    0.995
## 816   FALSE     1       2307       4458     QKN22456.1    0.990
## 817   FALSE     1        855       4458     QKN22455.1    0.989
## 818    TRUE     1        171       4458           <NA>       NA
## 819    TRUE     1        519       4458 XP_002083176.1    0.396
## 854    TRUE     1       1197       3938     QKN22497.1    0.992
## 855   FALSE     1        153       3938           <NA>       NA
## 856    TRUE     1        894       3938     QKN22496.1    0.993
## 857    TRUE     1        564       3938     QKN22498.1    0.984
## 858    TRUE     1        903       3938     QKN22499.1    0.858
## 893    TRUE     1        276      20719           <NA>       NA
## 894    TRUE     1        180      20719           <NA>       NA
## 895   FALSE     1       1032      20719     QKN22479.1    0.916
## 896    TRUE     1        195      20719           <NA>       NA
## 897   FALSE     1       5223      20719     QKN22478.1    0.977
## 898    TRUE     1        666      20719     QKN22517.1    0.977
## 899   FALSE     1        165      20719           <NA>       NA
## 900    TRUE     1       1107      20719     QKN22483.1    0.981
## 901   FALSE     1        162      20719           <NA>       NA
## 902    TRUE     1       4017      20719     QKN22518.1    0.878
## 903   FALSE     1       1014      20719     QKN22519.1    0.991
## 904    TRUE     1       1905      20719           <NA>       NA
## 905    TRUE     1       1206      20719 YP_009345656.1    0.289
## 906   FALSE     1       1821      20719     QKN22481.1    0.978
## 1054   TRUE     1       1041      12802 YP_009345706.1    0.296
## 1055  FALSE     1       1074      12802           <NA>       NA
## 1056  FALSE     1        381      12802           <NA>       NA
## 1057   TRUE     1       2232      12802     QKN22485.1    0.979
## 1058  FALSE     1        975      12802     QKN22492.1    0.933
## 1059  FALSE     1        969      12802     QKN22491.1    0.987
## 1060   TRUE     1        978      12802     QKN22484.1    0.957
## 1061  FALSE     1        504      12802     QKN22490.1    1.000
## 1062  FALSE     1        282      12802           <NA>       NA
## 1063  FALSE     1       1014      12802     QKN22489.1    0.970
## 1064   TRUE     1        453      12802     QKN22487.1    0.993
## 1065  FALSE     1        696      12802     QKN22488.1    0.990
## 1066   TRUE     1        453      12802           <NA>       NA
## 1067   TRUE     1        606      12802           <NA>       NA
## 1068  FALSE     1        249      12482           <NA>       NA
## 1069   TRUE     1        948      12482     QKN22463.1    0.958
## 1070  FALSE     1        228      12482           <NA>       NA
## 1071  FALSE     1        810      12482     QKN22460.1    0.977
## 1072  FALSE     1        411      12482     QKN22459.1    0.978
## 1073  FALSE     1       1419      12482     QKN22458.1    0.987
## 1074  FALSE     1       5274      12482     QKN22461.1    0.977
## 1075  FALSE     1        162      12482           <NA>       NA
## 1076  FALSE     1       1200      12482     QKN22457.1    0.990
## 1253  FALSE     1        231       1744     QKN22523.1    0.680
## 1254  FALSE     1        189       1744     QKN22523.1    0.677
## 1255  FALSE     1        753       1744     QKN22523.1    0.988
##      alignment_length mismatches gap_opens qstart qend sstart send
## 477               319         11         0      1  319    161  473
## 489               295          2         0      1  295    959 1252
## 564                NA         NA        NA     NA   NA     NA   NA
## 565                NA         NA        NA     NA   NA     NA   NA
## 617               571         33         0      1  571      1  570
## 618               489         25         0      1  489      1  473
## 619               266         15         0      1  266      1  259
## 620               122          3         0      1  122      1  122
## 621               146          3         0      8  153      2  147
## 622               248         12         0      1  248      1  238
## 623               296          8         0      1  296      1  291
## 624               209          4         0      1  209      1  207
## 625                NA         NA        NA     NA   NA     NA   NA
## 626               556         10         0      1  556      1  556
## 627                NA         NA        NA     NA   NA     NA   NA
## 628                47         26         0     41   87     38   84
## 629                NA         NA        NA     NA   NA     NA   NA
## 630                NA         NA        NA     NA   NA     NA   NA
## 631               347          9         0      1  347      1  346
## 632               589          4         0      1  589      1  588
## 633                NA         NA        NA     NA   NA     NA   NA
## 634                NA         NA        NA     NA   NA     NA   NA
## 635                NA         NA        NA     NA   NA     NA   NA
## 636                NA         NA        NA     NA   NA     NA   NA
## 637                NA         NA        NA     NA   NA     NA   NA
## 638               615        384         0     83  697     52  642
## 639                67         16         0      7   68     35  101
## 640                NA         NA        NA     NA   NA     NA   NA
## 641                NA         NA        NA     NA   NA     NA   NA
## 642                NA         NA        NA     NA   NA     NA   NA
## 643               457        297         0     92  548     78  532
## 644               287        185         0      3  281     13  299
## 645                NA         NA        NA     NA   NA     NA   NA
## 646               694         10         0      1  694      1  693
## 647                NA         NA        NA     NA   NA     NA   NA
## 648               244          1         0     30  273      1  244
## 721               166          0         0      1  166      1  166
## 722               102          0         0      1  102      1  102
## 723                73         10         0      6   78    314  385
## 724               463          5         0      1  463      1  462
## 725               334          3         0      1  334      1  334
## 726               704         13         0      1  704     30  728
## 727               276         11         0      1  276      1  269
## 728               292          2         0      1  292      1  292
## 729               194          2         0      1  194      1  194
## 730               235          3         0      1  235      1  233
## 731               119          5         0      1  119      1  116
## 741               416          5         0      1  416      1  412
## 742               302          0         0      1  302      1  302
## 743                NA         NA        NA     NA   NA     NA   NA
## 744                NA         NA        NA     NA   NA     NA   NA
## 745                NA         NA        NA     NA   NA     NA   NA
## 746               346          7         0      1  342      1  346
## 747                NA         NA        NA     NA   NA     NA   NA
## 748               714         17         0      1  714     19  727
## 749               247          1         0      1  247      1  247
## 816               769          8         0      1  769      1  769
## 817               285          3         0      1  285      1  285
## 818                NA         NA        NA     NA   NA     NA   NA
## 819               101         60         0     35  134     30  130
## 854               399          3         0      1  399      1  399
## 855                NA         NA        NA     NA   NA     NA   NA
## 856               298          2         0      1  298      1  298
## 857               188          3         0      1  188      1  188
## 858               333         43         0      1  301      1  333
## 893                NA         NA        NA     NA   NA     NA   NA
## 894                NA         NA        NA     NA   NA     NA   NA
## 895               349         29         0      1  344      1  349
## 896                NA         NA        NA     NA   NA     NA   NA
## 897              1747         40         0      1 1741      1 1747
## 898               222          5         0      1  222      1  218
## 899                NA         NA        NA     NA   NA     NA   NA
## 900               369          7         0      1  369      1  369
## 901                NA         NA        NA     NA   NA     NA   NA
## 902              1271        153         0     39 1309      1 1252
## 903               338          3         0      1  338      1  338
## 904                NA         NA        NA     NA   NA     NA   NA
## 905               389        247         0      9  397     11  358
## 906               617         13         0      1  607      1  617
## 1054              347        235         0      3  337      5  351
## 1055               NA         NA        NA     NA   NA     NA   NA
## 1056               NA         NA        NA     NA   NA     NA   NA
## 1057              744         15         0      1  744      1  737
## 1058              316         21         0      1  314      1  316
## 1059              323          4         0      1  323      1  320
## 1060              326         14         0      1  326      1  326
## 1061              168          0         0      1  168      1  168
## 1062               NA         NA        NA     NA   NA     NA   NA
## 1063              338         10         0      1  338      1  335
## 1064              151          1         0      1  151      1  151
## 1065              104          1         0    129  232      1  104
## 1066               NA         NA        NA     NA   NA     NA   NA
## 1067               NA         NA        NA     NA   NA     NA   NA
## 1068               NA         NA        NA     NA   NA     NA   NA
## 1069              316         13         0      1  316      1  316
## 1070               NA         NA        NA     NA   NA     NA   NA
## 1071              270          6         0      1  270      1  270
## 1072              137          3         0      1  137      1  137
## 1073              473          6         0      1  473      1  473
## 1074             1762         40         0      1 1758      1 1762
## 1075               NA         NA        NA     NA   NA     NA   NA
## 1076              400          4         0      1  400      1  398
## 1253               73         22         0      1   69     54  126
## 1254               57         17         0      4   58    103  159
## 1255              251          3         0      1  251      1  251
##             evalue bitscore
## 477  1.310000e-201      624
## 489  9.019000e-192      596
## 564             NA       NA
## 565             NA       NA
## 617  2.574082e-321      984
## 618  2.527000e-295      904
## 619  1.383000e-152      480
## 620   4.059000e-75      248
## 621   9.200000e-93      301
## 622  4.351000e-153      480
## 623  1.220000e-182      568
## 624  7.725000e-122      388
## 625             NA       NA
## 626   0.000000e+00     1126
## 627             NA       NA
## 628   9.676000e-06       47
## 629             NA       NA
## 630             NA       NA
## 631  2.014000e-218      674
## 632   0.000000e+00     1204
## 633             NA       NA
## 634             NA       NA
## 635             NA       NA
## 636             NA       NA
## 637             NA       NA
## 638  1.353000e-100      349
## 639   1.973000e-22       94
## 640             NA       NA
## 641             NA       NA
## 642             NA       NA
## 643   6.465000e-67      244
## 644   8.672000e-39      151
## 645             NA       NA
## 646   0.000000e+00     1401
## 647             NA       NA
## 648  2.505000e-145      459
## 721  6.913000e-109      348
## 722   4.516000e-62      209
## 723   1.145000e-24      100
## 724  1.210000e-303      927
## 725  1.297000e-223      689
## 726   0.000000e+00     1390
## 727  3.561000e-157      494
## 728  1.155000e-196      608
## 729  1.959000e-125      397
## 730  9.427000e-151      473
## 731   6.826000e-58      198
## 741  9.498000e-269      824
## 742  1.090000e-191      595
## 743             NA       NA
## 744             NA       NA
## 745             NA       NA
## 746  1.448000e-217      672
## 747             NA       NA
## 748   0.000000e+00     1448
## 749  1.724000e-157      493
## 816   0.000000e+00     1535
## 817  2.571000e-182      567
## 818             NA       NA
## 819   1.455000e-21       96
## 854  4.731000e-270      826
## 855             NA       NA
## 856  2.112000e-190      591
## 857  5.552000e-119      378
## 858  2.016000e-175      548
## 893             NA       NA
## 894             NA       NA
## 895  3.993000e-207      642
## 896             NA       NA
## 897   0.000000e+00     3397
## 898  1.080000e-138      437
## 899             NA       NA
## 900  7.696000e-244      749
## 901             NA       NA
## 902   0.000000e+00     2224
## 903  1.898000e-225      694
## 904             NA       NA
## 905   7.797000e-40      159
## 906   0.000000e+00     1211
## 1054  5.965000e-32      134
## 1055            NA       NA
## 1056            NA       NA
## 1057  0.000000e+00     1435
## 1058 6.033000e-162      510
## 1059 7.103000e-197      611
## 1060 5.392000e-209      646
## 1061 1.043000e-108      347
## 1062            NA       NA
## 1063 1.570000e-210      651
## 1064  2.352000e-95      308
## 1065  2.531000e-58      206
## 1066            NA       NA
## 1067            NA       NA
## 1068            NA       NA
## 1069 3.813000e-207      640
## 1070            NA       NA
## 1071 4.182000e-178      554
## 1072  3.896000e-77      255
## 1073 1.345000e-309      945
## 1074  0.000000e+00     3413
## 1075            NA       NA
## 1076 9.075000e-260      797
## 1253  4.144000e-17       79
## 1254  8.819000e-15       71
## 1255 2.666000e-153      481
##                                                                                    annotation
## 477                             putative protein 21 [Drosophila-associated filamentous virus]
## 489                      putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 564                                                                                      <NA>
## 565                                                                                      <NA>
## 617                             putative protein 20 [Drosophila-associated filamentous virus]
## 618                             putative protein 21 [Drosophila-associated filamentous virus]
## 619                             putative protein 22 [Drosophila-associated filamentous virus]
## 620                             putative protein 18 [Drosophila-associated filamentous virus]
## 621                             putative protein 17 [Drosophila-associated filamentous virus]
## 622                                  putative ORF19 [Drosophila-associated filamentous virus]
## 623                             putative protein 15 [Drosophila-associated filamentous virus]
## 624                             putative protein 13 [Drosophila-associated filamentous virus]
## 625                                                                                      <NA>
## 626                    putative JmJC domain protein [Drosophila-associated filamentous virus]
## 627                                                                                      <NA>
## 628                  hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 629                                                                                      <NA>
## 630                                                                                      <NA>
## 631                             putative protein 11 [Drosophila-associated filamentous virus]
## 632                                 putative ORF107 [Drosophila-associated filamentous virus]
## 633                                                                                      <NA>
## 634                                                                                      <NA>
## 635                                                                                      <NA>
## 636                                                                                      <NA>
## 637                                                                                      <NA>
## 638                  hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 639                             putative protein 53 [Drosophila-associated filamentous virus]
## 640                                                                                      <NA>
## 641                                                                                      <NA>
## 642                                                                                      <NA>
## 643                                  putative ATPase [Leptopilina boulardi filamentous virus]
## 644                  hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 645                                                                                      <NA>
## 646                             putative protein 65 [Drosophila-associated filamentous virus]
## 647                                                                                      <NA>
## 648                             putative protein 19 [Drosophila-associated filamentous virus]
## 721                             putative protein 61 [Drosophila-associated filamentous virus]
## 722                             putative protein 47 [Drosophila-associated filamentous virus]
## 723                             putative protein 45 [Drosophila-associated filamentous virus]
## 724                             putative protein 48 [Drosophila-associated filamentous virus]
## 725                                  putative ORF43 [Drosophila-associated filamentous virus]
## 726                         putative ORF96, partial [Drosophila-associated filamentous virus]
## 727                             putative protein 56 [Drosophila-associated filamentous virus]
## 728                             putative protein 57 [Drosophila-associated filamentous virus]
## 729                             putative protein 58 [Drosophila-associated filamentous virus]
## 730                      putative Ac81-like protein [Drosophila-associated filamentous virus]
## 731                             putative protein 60 [Drosophila-associated filamentous virus]
## 741  putative lecithine cholesterol acyltransferase [Drosophila-associated filamentous virus]
## 742                             putative protein 51 [Drosophila-associated filamentous virus]
## 743                                                                                      <NA>
## 744                                                                                      <NA>
## 745                                                                                      <NA>
## 746                             putative protein 54 [Drosophila-associated filamentous virus]
## 747                                                                                      <NA>
## 748                               PIF1-like protein [Drosophila-associated filamentous virus]
## 749                             putative protein 55 [Drosophila-associated filamentous virus]
## 816                                P74-like protein [Drosophila-associated filamentous virus]
## 817                              putative protein 1 [Drosophila-associated filamentous virus]
## 818                                                                                      <NA>
## 819                                                          lysozyme X [Drosophila simulans]
## 854                                  putative ORF20 [Drosophila-associated filamentous virus]
## 855                                                                                      <NA>
## 856                             putative protein 42 [Drosophila-associated filamentous virus]
## 857                             putative protein 44 [Drosophila-associated filamentous virus]
## 858                             putative protein 45 [Drosophila-associated filamentous virus]
## 893                                                                                      <NA>
## 894                                                                                      <NA>
## 895                             putative protein 25 [Drosophila-associated filamentous virus]
## 896                                                                                      <NA>
## 897                               PIF2-like protein [Drosophila-associated filamentous virus]
## 898                             putative protein 67 [Drosophila-associated filamentous virus]
## 899                                                                                      <NA>
## 900                             putative protein 29 [Drosophila-associated filamentous virus]
## 901                                                                                      <NA>
## 902                      putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 903                             putative protein 69 [Drosophila-associated filamentous virus]
## 904                                                                                      <NA>
## 905                  hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 906                             putative protein 27 [Drosophila-associated filamentous virus]
## 1054                hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 1055                                                                                     <NA>
## 1056                                                                                     <NA>
## 1057                                  putative ORF5 [Drosophila-associated filamentous virus]
## 1058                            putative protein 38 [Drosophila-associated filamentous virus]
## 1059                            putative protein 37 [Drosophila-associated filamentous virus]
## 1060                            putative protein 30 [Drosophila-associated filamentous virus]
## 1061                            putative protein 36 [Drosophila-associated filamentous virus]
## 1062                                                                                     <NA>
## 1063                                 putative ORF24 [Drosophila-associated filamentous virus]
## 1064                            putative protein 33 [Drosophila-associated filamentous virus]
## 1065                            putative protein 34 [Drosophila-associated filamentous virus]
## 1066                                                                                     <NA>
## 1067                                                                                     <NA>
## 1068                                                                                     <NA>
## 1069                                  putative ORF1 [Drosophila-associated filamentous virus]
## 1070                                                                                     <NA>
## 1071                  putative nudix domain protein [Drosophila-associated filamentous virus]
## 1072                             putative protein 5 [Drosophila-associated filamentous virus]
## 1073                             putative protein 4 [Drosophila-associated filamentous virus]
## 1074                             putative protein 7 [Drosophila-associated filamentous virus]
## 1075                                                                                     <NA>
## 1076                                  putative ORF2 [Drosophila-associated filamentous virus]
## 1253                           ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1254                           ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1255                           ODV-E66-like protein [Drosophila-associated filamentous virus]

Drosophila Vesantovirus

D.mel

contig_set=paste0("contig_",c(2753,20176,3903,4179,22788,15495,3119,2457,22865,11850))
contig_set_unassigned=paste0("contig_", c(8677))
# store for later fusion of corresponding lines
virus_list$'Vesantovirus_D.mel'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Vesantovirus_D.mel")
##                      orf_name        seqid    source type start  end score
## 106  contig_11850_1167_1355_+ contig_11850 getorf_JV gene  1167 1355     .
## 107    contig_11850_180_350_+ contig_11850 getorf_JV gene   180  350     .
## 108    contig_11850_482_676_+ contig_11850 getorf_JV gene   482  676     .
## 266    contig_15495_272_454_+ contig_15495 getorf_JV gene   272  454     .
## 267   contig_15495_953_1141_+ contig_15495 getorf_JV gene   953 1141     .
## 503   contig_20176_336_1028_- contig_20176 getorf_JV gene   336 1028     .
## 750  contig_22788_1197_4418_+ contig_22788 getorf_JV gene  1197 4418     .
## 751    contig_22788_188_937_+ contig_22788 getorf_JV gene   188  937     .
## 752      contig_22788_3_158_- contig_22788 getorf_JV gene     3  158     .
## 753  contig_22788_4495_4677_+ contig_22788 getorf_JV gene  4495 4677     .
## 754   contig_22788_943_1113_+ contig_22788 getorf_JV gene   943 1113     .
## 755  contig_22865_2320_2808_- contig_22865 getorf_JV gene  2320 2808     .
## 756    contig_22865_251_919_+ contig_22865 getorf_JV gene   251  919     .
## 757  contig_22865_2887_3303_+ contig_22865 getorf_JV gene  2887 3303     .
## 758  contig_22865_3311_3895_- contig_22865 getorf_JV gene  3311 3895     .
## 759  contig_22865_3941_4294_+ contig_22865 getorf_JV gene  3941 4294     .
## 760   contig_22865_895_2271_+ contig_22865 getorf_JV gene   895 2271     .
## 787       contig_2457_1_183_-  contig_2457 getorf_JV gene     1  183     .
## 788   contig_2457_1444_4692_-  contig_2457 getorf_JV gene  1444 4692     .
## 789    contig_2457_173_1441_+  contig_2457 getorf_JV gene   173 1441     .
## 790   contig_2457_4691_4843_+  contig_2457 getorf_JV gene  4691 4843     .
## 820    contig_2753_285_3890_-  contig_2753 getorf_JV gene   285 3890     .
## 850   contig_3119_1409_2581_-  contig_3119 getorf_JV gene  1409 2581     .
## 851   contig_3119_2591_2938_-  contig_3119 getorf_JV gene  2591 2938     .
## 852   contig_3119_3119_3409_-  contig_3119 getorf_JV gene  3119 3409     .
## 853      contig_3119_94_930_-  contig_3119 getorf_JV gene    94  930     .
## 907     contig_3903_18_1709_+  contig_3903 getorf_JV gene    18 1709     .
## 908   contig_3903_1876_2634_+  contig_3903 getorf_JV gene  1876 2634     .
## 909   contig_3903_2931_3113_+  contig_3903 getorf_JV gene  2931 3113     .
## 910   contig_3903_3151_3300_-  contig_3903 getorf_JV gene  3151 3300     .
## 911   contig_3903_3167_3316_+  contig_3903 getorf_JV gene  3167 3316     .
## 947   contig_4179_2109_2885_+  contig_4179 getorf_JV gene  2109 2885     .
## 948    contig_4179_278_1879_-  contig_4179 getorf_JV gene   278 1879     .
## 1187  contig_8677_1150_1299_-  contig_8677 getorf_JV gene  1150 1299     .
## 1188    contig_8677_116_295_-  contig_8677 getorf_JV gene   116  295     .
## 1189  contig_8677_1277_1780_+  contig_8677 getorf_JV gene  1277 1780     .
## 1190  contig_8677_1352_1780_+  contig_8677 getorf_JV gene  1352 1780     .
## 1191    contig_8677_334_621_-  contig_8677 getorf_JV gene   334  621     .
## 1192    contig_8677_334_891_-  contig_8677 getorf_JV gene   334  891     .
## 1193    contig_8677_620_781_+  contig_8677 getorf_JV gene   620  781     .
## 1194   contig_8677_884_1048_+  contig_8677 getorf_JV gene   884 1048     .
## 1195      contig_8677_9_356_-  contig_8677 getorf_JV gene     9  356     .
## 1196   contig_8677_963_1184_+  contig_8677 getorf_JV gene   963 1184     .
##      strand phase attributes seq_length     subject_id identity
## 106    TRUE     1        189       1484           <NA>       NA
## 107    TRUE     1        171       1484           <NA>       NA
## 108    TRUE     1        195       1484     QKT21494.1    0.984
## 266    TRUE     1        183       1249     QKT21493.1    1.000
## 267    TRUE     1        189       1249           <NA>       NA
## 503   FALSE     1        693       1063     QKT21491.1    0.987
## 750    TRUE     1       3222       4745     QKT21485.1    0.991
## 751    TRUE     1        750       4745 WP_007549166.1    0.376
## 752   FALSE     1        156       4745           <NA>       NA
## 753    TRUE     1        183       4745           <NA>       NA
## 754    TRUE     1        171       4745           <NA>       NA
## 755   FALSE     1        489       4294     QKT21502.1    1.000
## 756    TRUE     1        669       4294     QKT21500.1    1.000
## 757    TRUE     1        417       4294           <NA>       NA
## 758   FALSE     1        585       4294     QKT21499.1    1.000
## 759    TRUE     1        354       4294     QKT21477.1    0.674
## 760    TRUE     1       1377       4294     QKT21501.1    0.997
## 787   FALSE     1        183       4844           <NA>       NA
## 788   FALSE     1       3249       4844     AQN78642.1    0.988
## 789    TRUE     1       1269       4844     QKT21497.1    1.000
## 790    TRUE     1        153       4844           <NA>       NA
## 820   FALSE     1       3606       4419     QKT21484.1    0.981
## 850   FALSE     1       1173       3702     QKT21506.1    1.000
## 851   FALSE     1        348       3702           <NA>       NA
## 852   FALSE     1        291       3702           <NA>       NA
## 853   FALSE     1        837       3702     QKT21522.1    1.000
## 907    TRUE     1       1692       3316     QKT21488.1    0.936
## 908    TRUE     1        759       3316     QKT21525.1    1.000
## 909    TRUE     1        183       3316           <NA>       NA
## 910   FALSE     1        150       3316     QKT21524.1    0.880
## 911    TRUE     1        150       3316           <NA>       NA
## 947    TRUE     1        777       3153     QKT21494.1    1.000
## 948   FALSE     1       1602       3153     QKT21493.1    1.000
## 1187  FALSE     2        150       1830           <NA>       NA
## 1188  FALSE     1        180       1830           <NA>       NA
## 1189   TRUE     2        504       1830           <NA>       NA
## 1190   TRUE     1        429       1830           <NA>       NA
## 1191  FALSE     1        288       1830           <NA>       NA
## 1192  FALSE     2        558       1830           <NA>       NA
## 1193   TRUE     1        162       1830           <NA>       NA
## 1194   TRUE     2        165       1830           <NA>       NA
## 1195  FALSE     2        348       1830           <NA>       NA
## 1196   TRUE     1        222       1830           <NA>       NA
##      alignment_length mismatches gap_opens qstart qend sstart send     evalue
## 106                NA         NA        NA     NA   NA     NA   NA         NA
## 107                NA         NA        NA     NA   NA     NA   NA         NA
## 108                65          1         0      1   65    195  259  4.251e-36
## 266                61          0         0      1   61    474  534  3.467e-36
## 267                NA         NA        NA     NA   NA     NA   NA         NA
## 503               231          3         0      1  231      1  231 4.847e-146
## 750              1074         10         0      1 1074      1 1074  0.000e+00
## 751                93         56         0      1   91    167  259  5.418e-09
## 752                NA         NA        NA     NA   NA     NA   NA         NA
## 753                NA         NA        NA     NA   NA     NA   NA         NA
## 754                NA         NA        NA     NA   NA     NA   NA         NA
## 755               163          0         0      1  163      1  163 4.686e-103
## 756               223          0         0      1  223      1  223 1.391e-117
## 757                NA         NA        NA     NA   NA     NA   NA         NA
## 758               195          0         0      1  195      1  195 1.131e-112
## 759                79         24         0     40  118    139  214  5.457e-21
## 760               459          1         0      1  459      1  459 2.508e-293
## 787                NA         NA        NA     NA   NA     NA   NA         NA
## 788              1083         13         0      1 1083      1 1083  0.000e+00
## 789               423          0         0      1  423      1  423 4.661e-281
## 790                NA         NA        NA     NA   NA     NA   NA         NA
## 820              1202         23         0      1 1202      1 1202  0.000e+00
## 850               391          0         0      1  391      1  391 2.261e-263
## 851                NA         NA        NA     NA   NA     NA   NA         NA
## 852                NA         NA        NA     NA   NA     NA   NA         NA
## 853               279          0         0      1  279      1  279 4.857e-185
## 907               564         36         0      1  564      1  564  0.000e+00
## 908               253          0         0      1  253      1  253 9.866e-169
## 909                NA         NA        NA     NA   NA     NA   NA         NA
## 910                50          6         0      1   49      1   50  1.081e-17
## 911                NA         NA        NA     NA   NA     NA   NA         NA
## 947               259          0         0      1  259      1  259 3.279e-171
## 948               534          0         0      1  534      1  534  0.000e+00
## 1187               NA         NA        NA     NA   NA     NA   NA         NA
## 1188               NA         NA        NA     NA   NA     NA   NA         NA
## 1189               NA         NA        NA     NA   NA     NA   NA         NA
## 1190               NA         NA        NA     NA   NA     NA   NA         NA
## 1191               NA         NA        NA     NA   NA     NA   NA         NA
## 1192               NA         NA        NA     NA   NA     NA   NA         NA
## 1193               NA         NA        NA     NA   NA     NA   NA         NA
## 1194               NA         NA        NA     NA   NA     NA   NA         NA
## 1195               NA         NA        NA     NA   NA     NA   NA         NA
## 1196               NA         NA        NA     NA   NA     NA   NA         NA
##      bitscore
## 106        NA
## 107        NA
## 108       132
## 266       132
## 267        NA
## 503       459
## 750      2120
## 751        62
## 752        NA
## 753        NA
## 754        NA
## 755       331
## 756       376
## 757        NA
## 758       361
## 759        92
## 760       897
## 787        NA
## 788      2209
## 789       859
## 790        NA
## 820      2463
## 850       807
## 851        NA
## 852        NA
## 853       574
## 907      1088
## 908       526
## 909        NA
## 910        79
## 911        NA
## 947       533
## 948      1090
## 1187       NA
## 1188       NA
## 1189       NA
## 1190       NA
## 1191       NA
## 1192       NA
## 1193       NA
## 1194       NA
## 1195       NA
## 1196       NA
##                                                                 annotation
## 106                                                                   <NA>
## 107                                                                   <NA>
## 108                                  putative glycoprotein [Vesanto virus]
## 266                               putative DUF3472 protein [Vesanto virus]
## 267                                                                   <NA>
## 503                                 hypothetical protein 1 [Vesanto virus]
## 750                            putative structural protein [Vesanto virus]
## 751  hypothetical protein [Wolbachia endosymbiont of Drosophila ananassae]
## 752                                                                   <NA>
## 753                                                                   <NA>
## 754                                                                   <NA>
## 755                                  putative coat protein [Vesanto virus]
## 756                                 hypothetical protein 3 [Vesanto virus]
## 757                                                                   <NA>
## 758                                 hypothetical protein 2 [Vesanto virus]
## 759                                   hypothetical protein [Vesanto virus]
## 760                                   putative NS1 protein [Vesanto virus]
## 787                                                                   <NA>
## 788                            putative structural protein [Vesanto virus]
## 789                                putative capsid protein [Vesanto virus]
## 790                                                                   <NA>
## 820                              putative DNA polymerase B [Vesanto virus]
## 850                       putative nuclease domain protein [Vesanto virus]
## 851                                                                   <NA>
## 852                                                                   <NA>
## 853                                  putative glycoprotein [Vesanto virus]
## 907                               putative DUF3472 protein [Vesanto virus]
## 908                                  putative glycoprotein [Vesanto virus]
## 909                                                                   <NA>
## 910                               putative DUF3472 protein [Vesanto virus]
## 911                                                                   <NA>
## 947                                  putative glycoprotein [Vesanto virus]
## 948                               putative DUF3472 protein [Vesanto virus]
## 1187                                                                  <NA>
## 1188                                                                  <NA>
## 1189                                                                  <NA>
## 1190                                                                  <NA>
## 1191                                                                  <NA>
## 1192                                                                  <NA>
## 1193                                                                  <NA>
## 1194                                                                  <NA>
## 1195                                                                  <NA>
## 1196                                                                  <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                      orf_name        seqid    source type start  end score
## 106  contig_11850_1167_1355_+ contig_11850 getorf_JV gene  1167 1355     .
## 107    contig_11850_180_350_+ contig_11850 getorf_JV gene   180  350     .
## 108    contig_11850_482_676_+ contig_11850 getorf_JV gene   482  676     .
## 266    contig_15495_272_454_+ contig_15495 getorf_JV gene   272  454     .
## 267   contig_15495_953_1141_+ contig_15495 getorf_JV gene   953 1141     .
## 503   contig_20176_336_1028_- contig_20176 getorf_JV gene   336 1028     .
## 750  contig_22788_1197_4418_+ contig_22788 getorf_JV gene  1197 4418     .
## 751    contig_22788_188_937_+ contig_22788 getorf_JV gene   188  937     .
## 752      contig_22788_3_158_- contig_22788 getorf_JV gene     3  158     .
## 753  contig_22788_4495_4677_+ contig_22788 getorf_JV gene  4495 4677     .
## 754   contig_22788_943_1113_+ contig_22788 getorf_JV gene   943 1113     .
## 755  contig_22865_2320_2808_- contig_22865 getorf_JV gene  2320 2808     .
## 756    contig_22865_251_919_+ contig_22865 getorf_JV gene   251  919     .
## 757  contig_22865_2887_3303_+ contig_22865 getorf_JV gene  2887 3303     .
## 758  contig_22865_3311_3895_- contig_22865 getorf_JV gene  3311 3895     .
## 759  contig_22865_3941_4294_+ contig_22865 getorf_JV gene  3941 4294     .
## 760   contig_22865_895_2271_+ contig_22865 getorf_JV gene   895 2271     .
## 787       contig_2457_1_183_-  contig_2457 getorf_JV gene     1  183     .
## 788   contig_2457_1444_4692_-  contig_2457 getorf_JV gene  1444 4692     .
## 789    contig_2457_173_1441_+  contig_2457 getorf_JV gene   173 1441     .
## 790   contig_2457_4691_4843_+  contig_2457 getorf_JV gene  4691 4843     .
## 820    contig_2753_285_3890_-  contig_2753 getorf_JV gene   285 3890     .
## 850   contig_3119_1409_2581_-  contig_3119 getorf_JV gene  1409 2581     .
## 851   contig_3119_2591_2938_-  contig_3119 getorf_JV gene  2591 2938     .
## 852   contig_3119_3119_3409_-  contig_3119 getorf_JV gene  3119 3409     .
## 853      contig_3119_94_930_-  contig_3119 getorf_JV gene    94  930     .
## 907     contig_3903_18_1709_+  contig_3903 getorf_JV gene    18 1709     .
## 908   contig_3903_1876_2634_+  contig_3903 getorf_JV gene  1876 2634     .
## 909   contig_3903_2931_3113_+  contig_3903 getorf_JV gene  2931 3113     .
## 910   contig_3903_3151_3300_-  contig_3903 getorf_JV gene  3151 3300     .
## 911   contig_3903_3167_3316_+  contig_3903 getorf_JV gene  3167 3316     .
## 947   contig_4179_2109_2885_+  contig_4179 getorf_JV gene  2109 2885     .
## 948    contig_4179_278_1879_-  contig_4179 getorf_JV gene   278 1879     .
## 1187  contig_8677_1150_1299_-  contig_8677 getorf_JV gene  1150 1299     .
## 1188    contig_8677_116_295_-  contig_8677 getorf_JV gene   116  295     .
## 1189  contig_8677_1277_1780_+  contig_8677 getorf_JV gene  1277 1780     .
## 1190  contig_8677_1352_1780_+  contig_8677 getorf_JV gene  1352 1780     .
## 1191    contig_8677_334_621_-  contig_8677 getorf_JV gene   334  621     .
## 1192    contig_8677_334_891_-  contig_8677 getorf_JV gene   334  891     .
## 1193    contig_8677_620_781_+  contig_8677 getorf_JV gene   620  781     .
## 1194   contig_8677_884_1048_+  contig_8677 getorf_JV gene   884 1048     .
## 1195      contig_8677_9_356_-  contig_8677 getorf_JV gene     9  356     .
## 1196   contig_8677_963_1184_+  contig_8677 getorf_JV gene   963 1184     .
##      strand phase attributes seq_length     subject_id identity
## 106    TRUE     1        189       1484           <NA>       NA
## 107    TRUE     1        171       1484           <NA>       NA
## 108    TRUE     1        195       1484     QKT21494.1    0.984
## 266    TRUE     1        183       1249     QKT21493.1    1.000
## 267    TRUE     1        189       1249           <NA>       NA
## 503   FALSE     1        693       1063     QKT21491.1    0.987
## 750    TRUE     1       3222       4745     QKT21485.1    0.991
## 751    TRUE     1        750       4745 WP_007549166.1    0.376
## 752   FALSE     1        156       4745           <NA>       NA
## 753    TRUE     1        183       4745           <NA>       NA
## 754    TRUE     1        171       4745           <NA>       NA
## 755   FALSE     1        489       4294     QKT21502.1    1.000
## 756    TRUE     1        669       4294     QKT21500.1    1.000
## 757    TRUE     1        417       4294           <NA>       NA
## 758   FALSE     1        585       4294     QKT21499.1    1.000
## 759    TRUE     1        354       4294     QKT21477.1    0.674
## 760    TRUE     1       1377       4294     QKT21501.1    0.997
## 787   FALSE     1        183       4844           <NA>       NA
## 788   FALSE     1       3249       4844     AQN78642.1    0.988
## 789    TRUE     1       1269       4844     QKT21497.1    1.000
## 790    TRUE     1        153       4844           <NA>       NA
## 820   FALSE     1       3606       4419     QKT21484.1    0.981
## 850   FALSE     1       1173       3702     QKT21506.1    1.000
## 851   FALSE     1        348       3702           <NA>       NA
## 852   FALSE     1        291       3702           <NA>       NA
## 853   FALSE     1        837       3702     QKT21522.1    1.000
## 907    TRUE     1       1692       3316     QKT21488.1    0.936
## 908    TRUE     1        759       3316     QKT21525.1    1.000
## 909    TRUE     1        183       3316           <NA>       NA
## 910   FALSE     1        150       3316     QKT21524.1    0.880
## 911    TRUE     1        150       3316           <NA>       NA
## 947    TRUE     1        777       3153     QKT21494.1    1.000
## 948   FALSE     1       1602       3153     QKT21493.1    1.000
## 1187  FALSE     2        150       1830           <NA>       NA
## 1188  FALSE     1        180       1830           <NA>       NA
## 1189   TRUE     2        504       1830           <NA>       NA
## 1190   TRUE     1        429       1830           <NA>       NA
## 1191  FALSE     1        288       1830           <NA>       NA
## 1192  FALSE     2        558       1830           <NA>       NA
## 1193   TRUE     1        162       1830           <NA>       NA
## 1194   TRUE     2        165       1830           <NA>       NA
## 1195  FALSE     2        348       1830           <NA>       NA
## 1196   TRUE     1        222       1830           <NA>       NA
##      alignment_length mismatches gap_opens qstart qend sstart send     evalue
## 106                NA         NA        NA     NA   NA     NA   NA         NA
## 107                NA         NA        NA     NA   NA     NA   NA         NA
## 108                65          1         0      1   65    195  259  4.251e-36
## 266                61          0         0      1   61    474  534  3.467e-36
## 267                NA         NA        NA     NA   NA     NA   NA         NA
## 503               231          3         0      1  231      1  231 4.847e-146
## 750              1074         10         0      1 1074      1 1074  0.000e+00
## 751                93         56         0      1   91    167  259  5.418e-09
## 752                NA         NA        NA     NA   NA     NA   NA         NA
## 753                NA         NA        NA     NA   NA     NA   NA         NA
## 754                NA         NA        NA     NA   NA     NA   NA         NA
## 755               163          0         0      1  163      1  163 4.686e-103
## 756               223          0         0      1  223      1  223 1.391e-117
## 757                NA         NA        NA     NA   NA     NA   NA         NA
## 758               195          0         0      1  195      1  195 1.131e-112
## 759                79         24         0     40  118    139  214  5.457e-21
## 760               459          1         0      1  459      1  459 2.508e-293
## 787                NA         NA        NA     NA   NA     NA   NA         NA
## 788              1083         13         0      1 1083      1 1083  0.000e+00
## 789               423          0         0      1  423      1  423 4.661e-281
## 790                NA         NA        NA     NA   NA     NA   NA         NA
## 820              1202         23         0      1 1202      1 1202  0.000e+00
## 850               391          0         0      1  391      1  391 2.261e-263
## 851                NA         NA        NA     NA   NA     NA   NA         NA
## 852                NA         NA        NA     NA   NA     NA   NA         NA
## 853               279          0         0      1  279      1  279 4.857e-185
## 907               564         36         0      1  564      1  564  0.000e+00
## 908               253          0         0      1  253      1  253 9.866e-169
## 909                NA         NA        NA     NA   NA     NA   NA         NA
## 910                50          6         0      1   49      1   50  1.081e-17
## 911                NA         NA        NA     NA   NA     NA   NA         NA
## 947               259          0         0      1  259      1  259 3.279e-171
## 948               534          0         0      1  534      1  534  0.000e+00
## 1187               NA         NA        NA     NA   NA     NA   NA         NA
## 1188               NA         NA        NA     NA   NA     NA   NA         NA
## 1189               NA         NA        NA     NA   NA     NA   NA         NA
## 1190               NA         NA        NA     NA   NA     NA   NA         NA
## 1191               NA         NA        NA     NA   NA     NA   NA         NA
## 1192               NA         NA        NA     NA   NA     NA   NA         NA
## 1193               NA         NA        NA     NA   NA     NA   NA         NA
## 1194               NA         NA        NA     NA   NA     NA   NA         NA
## 1195               NA         NA        NA     NA   NA     NA   NA         NA
## 1196               NA         NA        NA     NA   NA     NA   NA         NA
##      bitscore
## 106        NA
## 107        NA
## 108       132
## 266       132
## 267        NA
## 503       459
## 750      2120
## 751        62
## 752        NA
## 753        NA
## 754        NA
## 755       331
## 756       376
## 757        NA
## 758       361
## 759        92
## 760       897
## 787        NA
## 788      2209
## 789       859
## 790        NA
## 820      2463
## 850       807
## 851        NA
## 852        NA
## 853       574
## 907      1088
## 908       526
## 909        NA
## 910        79
## 911        NA
## 947       533
## 948      1090
## 1187       NA
## 1188       NA
## 1189       NA
## 1190       NA
## 1191       NA
## 1192       NA
## 1193       NA
## 1194       NA
## 1195       NA
## 1196       NA
##                                                                 annotation
## 106                                                                   <NA>
## 107                                                                   <NA>
## 108                                  putative glycoprotein [Vesanto virus]
## 266                               putative DUF3472 protein [Vesanto virus]
## 267                                                                   <NA>
## 503                                 hypothetical protein 1 [Vesanto virus]
## 750                            putative structural protein [Vesanto virus]
## 751  hypothetical protein [Wolbachia endosymbiont of Drosophila ananassae]
## 752                                                                   <NA>
## 753                                                                   <NA>
## 754                                                                   <NA>
## 755                                  putative coat protein [Vesanto virus]
## 756                                 hypothetical protein 3 [Vesanto virus]
## 757                                                                   <NA>
## 758                                 hypothetical protein 2 [Vesanto virus]
## 759                                   hypothetical protein [Vesanto virus]
## 760                                   putative NS1 protein [Vesanto virus]
## 787                                                                   <NA>
## 788                            putative structural protein [Vesanto virus]
## 789                                putative capsid protein [Vesanto virus]
## 790                                                                   <NA>
## 820                              putative DNA polymerase B [Vesanto virus]
## 850                       putative nuclease domain protein [Vesanto virus]
## 851                                                                   <NA>
## 852                                                                   <NA>
## 853                                  putative glycoprotein [Vesanto virus]
## 907                               putative DUF3472 protein [Vesanto virus]
## 908                                  putative glycoprotein [Vesanto virus]
## 909                                                                   <NA>
## 910                               putative DUF3472 protein [Vesanto virus]
## 911                                                                   <NA>
## 947                                  putative glycoprotein [Vesanto virus]
## 948                               putative DUF3472 protein [Vesanto virus]
## 1187                                                                  <NA>
## 1188                                                                  <NA>
## 1189                                                                  <NA>
## 1190                                                                  <NA>
## 1191                                                                  <NA>
## 1192                                                                  <NA>
## 1193                                                                  <NA>
## 1194                                                                  <NA>
## 1195                                                                  <NA>
## 1196                                                                  <NA>

new RNA viruses

prepare data

import all wta contigs and their gff

contigs_wta=readBStringSet("../sequences/wta_final_contigs_with_unassigned.fa")
head(contigs_wta)
## BStringSet object of length 6:
##     width seq                                               names               
## [1]  1662 CGAGATACATCGGTGACTGGAGG...ACACTCAATCACACAAAAGAAAT contig_10017
## [2]  1659 CAATAAAGATAAGAATGCAAACA...TTTGTTTCAATATAATTTTTGAA contig_10041
## [3]  1651 AATATTTGTGCCAAAAGAGCATC...TATACAATAAGACTTACCATACT contig_10108
## [4]  1621 CAAGAGTATCGATCTAAAATTAA...GAATAAAGCTGGTTTAAAGTCCC contig_10399
## [5]  1613 AGGAGAAGGAGAGACTAAAAAGC...TCGAAGCGTAAGGGAAAAGGAGC contig_10471
## [6]  1587 GAATAAATTATGGTGTAGTAAGA...AAAGCATATCTAAAGTACGCTAT contig_10707
gff_wta=read.table("../sequences/wta_final_contigs.gff")
# add unassigned contigs
gff_wta_unassigned0=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option0.gff")
gff_wta_unassigned1=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option1.gff")
# we will use the phase column to indicate which getorf option has been used : 1=>1; 2=>0
names(gff_wta)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wta_unassigned0)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wta_unassigned1)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
gff_wta$phase=1
gff_wta_unassigned1$phase=1
gff_wta_unassigned0$phase=2

gff_wta=rbind(gff_wta, gff_wta_unassigned0, gff_wta_unassigned1)

gff_wta$strand=as.factor(gff_wta$strand)
levels(gff_wta$strand)=c("FALSE", "TRUE")
gff_wta$phase=as.factor(gff_wta$phase)

head(gff_wta)

Add contig length

contig_length=width(contigs_wta)
names(contig_length)=names(contigs_wta)
gff_wta=merge(gff_wta, contig_length, by.x="seqid", by.y="row.names")
names(gff_wta)[10]="seq_length"

Add an orf id column

orf_names=paste(gff_wta$seqid, gff_wta$start, gff_wta$end, gff_wta$strand, sep="_")
orf_names=sub(pattern = "TRUE", "+", x = orf_names)
orf_names=sub(pattern = "FALSE", "-", x = orf_names)
gff_wta$orf_name=orf_names

import blastp results

wta_blast=read.table("../TABLES/wta_final_contigs_getorf.blastp.tab")
names=c("query_id", "subject_id", "identity", "alignment_length", "mismatches", "gap_opens", "qstart", "qend", "sstart", "send", "evalue", "bitscore")
names(wta_blast)=names
head(wta_blast)

import subject_id sequences (with informative names)

subject_id_seqs=readBStringSet("../sequences/wta_protein_homologs.fasta")
subject_id_seqs
## BStringSet object of length 2132:
##        width seq                                            names               
##    [1]   874 MQCPNQNHMLVNRAMVVAALDS...SLAQKLPCGGVVIQVIHNVYV NP_041191.1 RNA d...
##    [2]  1596 MAHFQQTMNTKVTEAGIGRNSL...VHKTAVNGSFAFCSIVKYLSD NP_056808.1 181 K...
##    [3]   458 MQFYYDTLLPGNSTILNEYDAV...VHKTAVNGSFAFCSIVKYLSD NP_056810.1 52KDa...
##    [4]  1648 MANINEQINNQRDAAASGRNNL...CLCKYLSDKRLFRSLYIDVSK NP_044577.1 186K ...
##    [5]  1601 MAQFQQTIDMQTLQAAAGRNSL...AFCSIIKYLSDKRLFRDLFFV NP_046151.1 unnam...
##    ...   ... ...
## [2128]   468 MSRYGFNNNRGAGQQQWRNFGP...PFKPVLRVKKFCSIDVKPVSM QMI58126.1 putati...
## [2129]   292 MAAPKSKFVFDFEKLKQTFVEI...PKIPVKCAANFLGTKAGSGKI XP_036671730.1 un...
## [2130]   292 MAAPKSKFVFDFEKLKQTFVEI...PKIPVKCAANFLGTKAGSGKI XP_036671731.1 un...
## [2131]   292 MAAPKSKFVFDFEKLKQTFVEI...PKIPVKCAANFLGTKAGSGKI XP_036671732.1 un...
## [2132]  1879 MDSLLDTSFTERFMSDPIYDGE...FRDDGTRRGGIHSRLGFVLVI YP_009976137.1 RN...
short_names=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){return(x[1])}))
annotation=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){
  res=paste0(x[-1],collapse = " ")
  return(res)}))
df=data.frame(short_names, annotation)

Add this information to blast output

wta_blast2=merge(wta_blast, df, by.x="subject_id", by.y="short_names", all.x=TRUE, all.y=FALSE)

Combine this to the gff

# select the smallest evalue for each query (group)
wta_blast2_besthit = wta_blast2 %>% group_by(query_id) %>% arrange(evalue) %>% dplyr::slice(1)
#wta_blast2_besthit=arrange(wta_blast2, evalue)
gff_wta2=merge(gff_wta, wta_blast2_besthit, by.x="orf_name", by.y="query_id", all.x=TRUE, all.y=FALSE)

write to disk:

write.table(gff_wta2, file = "../figures/orf_predictions/gff_wta2.txt", row.names = FALSE, col.names = TRUE, quote=FALSE, sep="\t")
head(gff_wta2)

import taxonomic information for all sequences involved in the phylogenies

wta_taxo_info=read.table("../TABLES/wta_protein_homologs.ids_taxid2.txt", sep=";", h=TRUE)
dim(wta_taxo_info)
## [1] 2132   24
head(wta_taxo_info)

Define family colours

wta_taxo_info$family_colour=as.factor(wta_taxo_info$family)
levels(wta_taxo_info$family_colour)=c(brewer.pal(12, name = "Set3"), brewer.pal(12, name = "Paired"), brewer.pal(8, name = "Set2"), brewer.pal(6, name = "Dark2"))

import sequences

seqs_wta=readBStringSet("../sequences/wta_final_contigs_with_unassigned.fa")

Galbut_virus_D.mel_virus_D.mel virus in D. melanogaster (7/7, 99%id)

Define the corresponding contigs :

contig_set=paste0("contig_", c(9575, 9476, 11373, 7659, 17024, 12188, 18451, 7863, 9319, 22044, 9859, 10399, 13219)) # Chaq included
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Galbut_virus_D.mel'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Galbut_virus_D.mel")
##                    orf_name        seqid    source type start  end score strand
## 18     contig_10399_1_183_- contig_10399 getorf_JV gene     1  183     .  FALSE
## 19  contig_10399_275_1612_+ contig_10399 getorf_JV gene   275 1612     .   TRUE
## 47   contig_11373_66_1445_- contig_11373 getorf_JV gene    66 1445     .  FALSE
## 67   contig_12188_38_1372_+ contig_12188 getorf_JV gene    38 1372     .   TRUE
## 107 contig_13219_258_1214_+ contig_13219 getorf_JV gene   258 1214     .   TRUE
## 176 contig_17024_122_1174_+ contig_17024 getorf_JV gene   122 1174     .   TRUE
## 224 contig_18451_625_1059_+ contig_18451 getorf_JV gene   625 1059     .   TRUE
## 288  contig_22044_168_914_+ contig_22044 getorf_JV gene   168  914     .   TRUE
## 393 contig_7659_1047_1988_+  contig_7659 getorf_JV gene  1047 1988     .   TRUE
## 394    contig_7659_3_1001_-  contig_7659 getorf_JV gene     3 1001     .  FALSE
## 398    contig_7863_34_345_-  contig_7863 getorf_JV gene    34  345     .  FALSE
## 399  contig_7863_504_1829_+  contig_7863 getorf_JV gene   504 1829     .   TRUE
## 445  contig_9319_139_1473_+  contig_9319 getorf_JV gene   139 1473     .   TRUE
## 457    contig_9476_3_1085_-  contig_9476 getorf_JV gene     3 1085     .  FALSE
## 458  contig_9575_125_1594_+  contig_9575 getorf_JV gene   125 1594     .   TRUE
## 472  contig_9859_476_1492_+  contig_9859 getorf_JV gene   476 1492     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 18      1        183       1621       <NA>       NA               NA         NA
## 19      1       1338       1621 AWY11128.1    0.991              446          4
## 47      1       1380       1525 AWY11085.1    0.958              460         19
## 67      1       1335       1458 AWY11051.1    0.888              438         49
## 107     1        957       1382 AKH40308.1    0.987              319          4
## 176     1       1053       1176 AWY11143.1    0.971              351         10
## 224     1        435       1120 AWY11050.1    0.979              145          3
## 288     1        747       1012 AWY11144.1    0.991              249          2
## 393     1        942       1990 AWY11085.1    0.968              314         10
## 394     1        999       1990 AWY11095.1    0.968              258          8
## 398     1        312       1953 AWY11174.1    0.942              104          6
## 399     1       1326       1953 AWY11166.1    0.975              442         11
## 445     1       1335       1748 AWY11144.1    0.991              445          4
## 457     1       1083       1114 AWY11130.1    0.886              361         41
## 458     1       1470       1604 AWY11049.1    0.953              490         23
## 472     1       1017       1681 AWY11142.1    0.993              293          2
##     gap_opens qstart qend sstart send     evalue bitscore
## 18         NA     NA   NA     NA   NA         NA       NA
## 19          0      1  446     95  540 3.065e-311      948
## 47          0      1  460     36  494 9.548e-303      924
## 67          0      1  438      1  438 2.513e-261      804
## 107         0      1  319      1  319 3.619e-205      634
## 176         0      1  351      1  351 6.869e-232      713
## 224         0      1  145    352  496  1.216e-91      297
## 288         0      1  249    197  445 6.551e-170      529
## 393         0      1  314     36  349 3.472e-207      640
## 394         0     76  333     83  340 9.171e-169      530
## 398         0      1  104    339  442  1.466e-63      214
## 399         0      1  442      1  442 1.245e-281      862
## 445         0      1  445      1  445 1.408e-297      908
## 457         0      1  361      1  361 2.254e-206      640
## 458         0      1  490     51  540  0.000e+00     1005
## 472         0      1  293    240  532 6.874e-202      626
##                                               annotation
## 18                                                  <NA>
## 19  putative RNA-dependent RNA polymerase [Galbut virus]
## 47                                   orf1 [Galbut virus]
## 67                                   orf1 [Galbut virus]
## 107                                    orf1 [Chaq virus]
## 176                                  orf1 [Galbut virus]
## 224                                  orf1 [Galbut virus]
## 288                                  orf1 [Galbut virus]
## 393                                  orf1 [Galbut virus]
## 394                                  orf1 [Galbut virus]
## 398                                  orf1 [Galbut virus]
## 399                                  orf1 [Galbut virus]
## 445                                  orf1 [Galbut virus]
## 457                                  orf1 [Galbut virus]
## 458 putative RNA-dependent RNA polymerase [Galbut virus]
## 472 putative RNA-dependent RNA polymerase [Galbut virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 18     contig_10399_1_183_- contig_10399 getorf_JV gene     1  183     .  FALSE
## 19  contig_10399_275_1612_+ contig_10399 getorf_JV gene   275 1612     .   TRUE
## 47   contig_11373_66_1445_- contig_11373 getorf_JV gene    66 1445     .  FALSE
## 67   contig_12188_38_1372_+ contig_12188 getorf_JV gene    38 1372     .   TRUE
## 107 contig_13219_258_1214_+ contig_13219 getorf_JV gene   258 1214     .   TRUE
## 176 contig_17024_122_1174_+ contig_17024 getorf_JV gene   122 1174     .   TRUE
## 224 contig_18451_625_1059_+ contig_18451 getorf_JV gene   625 1059     .   TRUE
## 288  contig_22044_168_914_+ contig_22044 getorf_JV gene   168  914     .   TRUE
## 393 contig_7659_1047_1988_+  contig_7659 getorf_JV gene  1047 1988     .   TRUE
## 394    contig_7659_3_1001_-  contig_7659 getorf_JV gene     3 1001     .  FALSE
## 398    contig_7863_34_345_-  contig_7863 getorf_JV gene    34  345     .  FALSE
## 399  contig_7863_504_1829_+  contig_7863 getorf_JV gene   504 1829     .   TRUE
## 445  contig_9319_139_1473_+  contig_9319 getorf_JV gene   139 1473     .   TRUE
## 457    contig_9476_3_1085_-  contig_9476 getorf_JV gene     3 1085     .  FALSE
## 458  contig_9575_125_1594_+  contig_9575 getorf_JV gene   125 1594     .   TRUE
## 472  contig_9859_476_1492_+  contig_9859 getorf_JV gene   476 1492     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 18      1        183       1621       <NA>       NA               NA         NA
## 19      1       1338       1621 AWY11128.1    0.991              446          4
## 47      1       1380       1525 AWY11085.1    0.958              460         19
## 67      1       1335       1458 AWY11051.1    0.888              438         49
## 107     1        957       1382 AKH40308.1    0.987              319          4
## 176     1       1053       1176 AWY11143.1    0.971              351         10
## 224     1        435       1120 AWY11050.1    0.979              145          3
## 288     1        747       1012 AWY11144.1    0.991              249          2
## 393     1        942       1990 AWY11085.1    0.968              314         10
## 394     1        999       1990 AWY11095.1    0.968              258          8
## 398     1        312       1953 AWY11174.1    0.942              104          6
## 399     1       1326       1953 AWY11166.1    0.975              442         11
## 445     1       1335       1748 AWY11144.1    0.991              445          4
## 457     1       1083       1114 AWY11130.1    0.886              361         41
## 458     1       1470       1604 AWY11049.1    0.953              490         23
## 472     1       1017       1681 AWY11142.1    0.993              293          2
##     gap_opens qstart qend sstart send     evalue bitscore
## 18         NA     NA   NA     NA   NA         NA       NA
## 19          0      1  446     95  540 3.065e-311      948
## 47          0      1  460     36  494 9.548e-303      924
## 67          0      1  438      1  438 2.513e-261      804
## 107         0      1  319      1  319 3.619e-205      634
## 176         0      1  351      1  351 6.869e-232      713
## 224         0      1  145    352  496  1.216e-91      297
## 288         0      1  249    197  445 6.551e-170      529
## 393         0      1  314     36  349 3.472e-207      640
## 394         0     76  333     83  340 9.171e-169      530
## 398         0      1  104    339  442  1.466e-63      214
## 399         0      1  442      1  442 1.245e-281      862
## 445         0      1  445      1  445 1.408e-297      908
## 457         0      1  361      1  361 2.254e-206      640
## 458         0      1  490     51  540  0.000e+00     1005
## 472         0      1  293    240  532 6.874e-202      626
##                                               annotation
## 18                                                  <NA>
## 19  putative RNA-dependent RNA polymerase [Galbut virus]
## 47                                   orf1 [Galbut virus]
## 67                                   orf1 [Galbut virus]
## 107                                    orf1 [Chaq virus]
## 176                                  orf1 [Galbut virus]
## 224                                  orf1 [Galbut virus]
## 288                                  orf1 [Galbut virus]
## 393                                  orf1 [Galbut virus]
## 394                                  orf1 [Galbut virus]
## 398                                  orf1 [Galbut virus]
## 399                                  orf1 [Galbut virus]
## 445                                  orf1 [Galbut virus]
## 457                                  orf1 [Galbut virus]
## 458 putative RNA-dependent RNA polymerase [Galbut virus]
## 472 putative RNA-dependent RNA polymerase [Galbut virus]

Galbut_virus_D.mel_virus + Chaq virus in D. simulans (4+1, 80-97%id)

Define the corresponding contigs :

contig_set=paste0("contig_", c(10907, 7968, 10893, 18794, 7817)) # Chaq included
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Galbut_virus_D.sim'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Galbut_virus_D.sim")
##                    orf_name        seqid    source type start  end score strand
## 24  contig_10893_227_1075_+ contig_10893 getorf_JV gene   227 1075     .   TRUE
## 25   contig_10907_65_1546_- contig_10907 getorf_JV gene    65 1546     .  FALSE
## 227   contig_18794_3_1001_- contig_18794 getorf_JV gene     3 1001     .  FALSE
## 395     contig_7817_3_935_-  contig_7817 getorf_JV gene     3  935     .  FALSE
## 400 contig_7968_1496_1651_-  contig_7968 getorf_JV gene  1496 1651     .  FALSE
## 401 contig_7968_1657_1935_+  contig_7968 getorf_JV gene  1657 1935     .   TRUE
## 402  contig_7968_167_1489_-  contig_7968 getorf_JV gene   167 1489     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 24      1        849       1077 AWY11128.1    0.982              283          5
## 25      1       1482       1566 AWY11085.1    0.963              494         18
## 227     1        999       1108 AWY11172.1    0.975              333          8
## 395     1        933       1137 AKH40308.1    0.863              315         42
## 400     1        156       1937       <NA>       NA               NA         NA
## 401     1        279       1937 AWY11052.1    0.956               93          4
## 402     1       1323       1937 AWY11086.1    0.805              442         86
##     gap_opens qstart qend sstart send     evalue bitscore
## 24          0      1  283    240  522 2.308e-195      604
## 25          0      1  494      1  494  0.000e+00      989
## 227         0      1  333     51  383 3.418e-227      699
## 395         0      1  311      1  315 4.719e-179      559
## 400        NA     NA   NA     NA   NA         NA       NA
## 401         0      1   93     51  143  4.166e-56      192
## 402         0      1  441      1  442 1.332e-234      726
##                                               annotation
## 24  putative RNA-dependent RNA polymerase [Galbut virus]
## 25                                   orf1 [Galbut virus]
## 227 putative RNA-dependent RNA polymerase [Galbut virus]
## 395                                    orf1 [Chaq virus]
## 400                                                 <NA>
## 401 putative RNA-dependent RNA polymerase [Galbut virus]
## 402                                  orf1 [Galbut virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 24  contig_10893_227_1075_+ contig_10893 getorf_JV gene   227 1075     .   TRUE
## 25   contig_10907_65_1546_- contig_10907 getorf_JV gene    65 1546     .  FALSE
## 227   contig_18794_3_1001_- contig_18794 getorf_JV gene     3 1001     .  FALSE
## 395     contig_7817_3_935_-  contig_7817 getorf_JV gene     3  935     .  FALSE
## 400 contig_7968_1496_1651_-  contig_7968 getorf_JV gene  1496 1651     .  FALSE
## 401 contig_7968_1657_1935_+  contig_7968 getorf_JV gene  1657 1935     .   TRUE
## 402  contig_7968_167_1489_-  contig_7968 getorf_JV gene   167 1489     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 24      1        849       1077 AWY11128.1    0.982              283          5
## 25      1       1482       1566 AWY11085.1    0.963              494         18
## 227     1        999       1108 AWY11172.1    0.975              333          8
## 395     1        933       1137 AKH40308.1    0.863              315         42
## 400     1        156       1937       <NA>       NA               NA         NA
## 401     1        279       1937 AWY11052.1    0.956               93          4
## 402     1       1323       1937 AWY11086.1    0.805              442         86
##     gap_opens qstart qend sstart send     evalue bitscore
## 24          0      1  283    240  522 2.308e-195      604
## 25          0      1  494      1  494  0.000e+00      989
## 227         0      1  333     51  383 3.418e-227      699
## 395         0      1  311      1  315 4.719e-179      559
## 400        NA     NA   NA     NA   NA         NA       NA
## 401         0      1   93     51  143  4.166e-56      192
## 402         0      1  441      1  442 1.332e-234      726
##                                               annotation
## 24  putative RNA-dependent RNA polymerase [Galbut virus]
## 25                                   orf1 [Galbut virus]
## 227 putative RNA-dependent RNA polymerase [Galbut virus]
## 395                                    orf1 [Chaq virus]
## 400                                                 <NA>
## 401 putative RNA-dependent RNA polymerase [Galbut virus]
## 402                                  orf1 [Galbut virus]

LbTV_Lb

Define the corresponding contigs :

contig_set=c("contig_22597")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'LbTV_Lb'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "LbTV_Lb")
##                     orf_name        seqid    source type start  end score
## 293  contig_22597_240_2612_- contig_22597 getorf_JV gene   240 2612     .
## 294 contig_22597_2663_7720_- contig_22597 getorf_JV gene  2663 7720     .
##     strand phase attributes seq_length     subject_id identity alignment_length
## 293  FALSE     1       2373       7844 YP_009072448.1    1.000              791
## 294  FALSE     1       5058       7844 YP_009072447.2    0.997             1686
##     mismatches gap_opens qstart qend sstart send evalue bitscore
## 293          0         0      1  791      1  791      0     1679
## 294          5         0      1 1686      1 1686      0     3425
##                                                              annotation
## 293 RNA-dependent RNA polymerase [Leptopilina boulardi Toti-like virus]
## 294        putative coat protein [Leptopilina boulardi Toti-like virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

Hepe-Virga_L.b_virus mostly in L. boulardi and L. heterotoma (but also in D. phalerata and Trichopria, Pachychrepoideus and D. subobscura)

Define the corresponding contigs :

contig_set=c("contig_9042", "contig_22560")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Hepe-Virga_L.b'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Hepe-Virga_L.b")
##                     orf_name        seqid    source type start  end score
## 290  contig_22560_207_7349_+ contig_22560 getorf_JV gene   207 7349     .
## 291 contig_22560_7383_8246_+ contig_22560 getorf_JV gene  7383 8246     .
## 430    contig_9042_210_548_+  contig_9042 getorf_JV gene   210  548     .
## 431   contig_9042_538_1146_+  contig_9042 getorf_JV gene   538 1146     .
##     strand phase attributes seq_length subject_id identity alignment_length
## 290   TRUE     1       7143       8247 AWA82269.1    0.361             2379
## 291   TRUE     1        864       8247 AWA82267.1    0.370               54
## 430   TRUE     1        339       1422       <NA>       NA               NA
## 431   TRUE     1        609       1422 AWA82265.1    0.512              162
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 290       1434         0      3 2381    138 2382 0.000e+00     1371
## 291         33         0    184  237    151  204 1.620e-05       51
## 430         NA        NA     NA   NA     NA   NA        NA       NA
## 431         77         0     36  197     33  190 2.049e-46      170
##                                                  annotation
## 290 putative RNA-dependent RNA polymerase [Saiwaicho virus]
## 291                  hypothetical protein [Saiwaicho virus]
## 430                                                    <NA>
## 431                  hypothetical protein [Saiwaicho virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                     orf_name        seqid    source type start  end score
## 290  contig_22560_207_7349_+ contig_22560 getorf_JV gene   207 7349     .
## 291 contig_22560_7383_8246_+ contig_22560 getorf_JV gene  7383 8246     .
## 430    contig_9042_210_548_+  contig_9042 getorf_JV gene   210  548     .
## 431   contig_9042_538_1146_+  contig_9042 getorf_JV gene   538 1146     .
##     strand phase attributes seq_length subject_id identity alignment_length
## 290   TRUE     1       7143       8247 AWA82269.1    0.361             2379
## 291   TRUE     1        864       8247 AWA82267.1    0.370               54
## 430   TRUE     1        339       1422       <NA>       NA               NA
## 431   TRUE     1        609       1422 AWA82265.1    0.512              162
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 290       1434         0      3 2381    138 2382 0.000e+00     1371
## 291         33         0    184  237    151  204 1.620e-05       51
## 430         NA        NA     NA   NA     NA   NA        NA       NA
## 431         77         0     36  197     33  190 2.049e-46      170
##                                                  annotation
## 290 putative RNA-dependent RNA polymerase [Saiwaicho virus]
## 291                  hypothetical protein [Saiwaicho virus]
## 430                                                    <NA>
## 431                  hypothetical protein [Saiwaicho virus]

This virus was first described in Medd et al. 2018 as an Hepe-Virga virus. Similarly to what they found, the polyprotein contains the following domains :

domains

The second ORF from this contig is homologous to the hypothetical protein AWA82267.1 [Hepe-Virga_L.b virus].

The largest ORF from contig_9042 shows homology to the hypothetical protein AWA82265.1 which contains a conserved domain (pfam16504 : Putative virion membrane protein of plant and insect virus).

We built a phylogeny based on the RdRp domain only.

p = plot_phylogeny(file = "../phylogenies/contig_22560_207_7349_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
ggsave(filename = "../phylogenies/contig_22560_207_7349_+.pdf", plot = p, width = 10, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Wuhan_insect_virus_23 in D. subobscura

Define the corresponding contigs :

contig_set=c("contig_11634", "contig_10471")
contig_set_unassigned=paste0("contig_", c(11872))
# store for later fusion of corresponding lines
virus_list$'Partiti-like5_D.sub'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Partiti-like5_D.sub")
##                    orf_name        seqid    source type start  end score strand
## 20  contig_10471_238_1494_- contig_10471 getorf_JV gene   238 1494     .  FALSE
## 60   contig_11634_73_1485_- contig_11634 getorf_JV gene    73 1485     .  FALSE
## 61 contig_11872_1313_1480_- contig_11872 getorf_JV gene  1313 1480     .  FALSE
## 62 contig_11872_1332_1481_+ contig_11872 getorf_JV gene  1332 1481     .   TRUE
## 63  contig_11872_159_1214_- contig_11872 getorf_JV gene   159 1214     .  FALSE
## 64  contig_11872_159_1226_- contig_11872 getorf_JV gene   159 1226     .  FALSE
##    phase attributes seq_length     subject_id identity alignment_length
## 20     1       1257       1613 YP_009329883.1    0.403              393
## 60     1       1413       1503 YP_009329882.1    0.629              470
## 61     2        168       1482           <NA>       NA               NA
## 62     1        150       1482           <NA>       NA               NA
## 63     1       1056       1482           <NA>       NA               NA
## 64     2       1068       1482           <NA>       NA               NA
##    mismatches gap_opens qstart qend sstart send     evalue bitscore
## 20        228         0     26  418     26  408  2.010e-82      285
## 60        173         0      1  470      1  467 1.083e-209      656
## 61         NA        NA     NA   NA     NA   NA         NA       NA
## 62         NA        NA     NA   NA     NA   NA         NA       NA
## 63         NA        NA     NA   NA     NA   NA         NA       NA
## 64         NA        NA     NA   NA     NA   NA         NA       NA
##                                      annotation
## 20 hypothetical protein [Wuhan insect virus 23]
## 60                 RdRp [Wuhan insect virus 23]
## 61                                         <NA>
## 62                                         <NA>
## 63                                         <NA>
## 64                                         <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 20  contig_10471_238_1494_- contig_10471 getorf_JV gene   238 1494     .  FALSE
## 60   contig_11634_73_1485_- contig_11634 getorf_JV gene    73 1485     .  FALSE
## 61 contig_11872_1313_1480_- contig_11872 getorf_JV gene  1313 1480     .  FALSE
## 62 contig_11872_1332_1481_+ contig_11872 getorf_JV gene  1332 1481     .   TRUE
## 63  contig_11872_159_1214_- contig_11872 getorf_JV gene   159 1214     .  FALSE
## 64  contig_11872_159_1226_- contig_11872 getorf_JV gene   159 1226     .  FALSE
##    phase attributes seq_length     subject_id identity alignment_length
## 20     1       1257       1613 YP_009329883.1    0.403              393
## 60     1       1413       1503 YP_009329882.1    0.629              470
## 61     2        168       1482           <NA>       NA               NA
## 62     1        150       1482           <NA>       NA               NA
## 63     1       1056       1482           <NA>       NA               NA
## 64     2       1068       1482           <NA>       NA               NA
##    mismatches gap_opens qstart qend sstart send     evalue bitscore
## 20        228         0     26  418     26  408  2.010e-82      285
## 60        173         0      1  470      1  467 1.083e-209      656
## 61         NA        NA     NA   NA     NA   NA         NA       NA
## 62         NA        NA     NA   NA     NA   NA         NA       NA
## 63         NA        NA     NA   NA     NA   NA         NA       NA
## 64         NA        NA     NA   NA     NA   NA         NA       NA
##                                      annotation
## 20 hypothetical protein [Wuhan insect virus 23]
## 60                 RdRp [Wuhan insect virus 23]
## 61                                         <NA>
## 62                                         <NA>
## 63                                         <NA>
## 64                                         <NA>

Wuhan insect virus 23 is composed of 2 segments (1477bp and 1381bp), very similar to what we found here.

https://www.genome.jp/virushostdb/1923727

We built a phylogeny based on the RdRp domain only.

p = plot_phylogeny(file = "../phylogenies/contig_11634_73_1485_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(c(0,11))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_11634_73_1485_+.pdf", plot = p, width = 10, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

It is unclear whether contig 11872 is part of this genome but interestingly it shows nucleotidic sequence similarity with KP757972.1 (Uncultured virus clone DmelPosVir_36 genomic sequence) obtained from mixed D.ananassae, D. melanogaster, D. malerkotliana, and Scaptodrosophila latifasciaeformis) Webster et al. 2015 Plos Biol.

Cockroach_mivirus / Imjin_mivirus like in D. immigrans

Define the corresponding contigs :

contig_set=c("contig_2161", "contig_4815", "contig_18031")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Chuviridae1_D.im'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Chuviridae1_D.im")
##                    orf_name        seqid    source type start  end score strand
## 217   contig_18031_1_1071_- contig_18031 getorf_JV gene     1 1071     .  FALSE
## 277    contig_2161_3_5093_-  contig_2161 getorf_JV gene     3 5093     .  FALSE
## 337    contig_4815_21_899_+  contig_4815 getorf_JV gene    21  899     .   TRUE
## 338 contig_4815_2386_2622_+  contig_4815 getorf_JV gene  2386 2622     .   TRUE
## 339  contig_4815_977_2266_+  contig_4815 getorf_JV gene   977 2266     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 217     1       1071       1137 YP_009666257.1    0.325              347
## 277     1       5091       5326 YP_009337089.1    0.370             1647
## 337     1        879       2829 YP_009182178.1    0.387              186
## 338     1        237       2829           <NA>       NA               NA
## 339     1       1290       2829 YP_009337091.1    0.262              400
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 217        232         0      9  355     11  354 1.772e-58      212
## 277       1034         0     43 1684    201 1847 0.000e+00     1094
## 337        113         0      2  187    359  544 2.371e-38      151
## 338         NA        NA     NA   NA     NA   NA        NA       NA
## 339        277         0      7  406      8  384 1.054e-34      144
##                                                     annotation
## 217                   glycoprotein [Wuchang Cockroach Virus 3]
## 277 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
## 337                putative glycoprotein [Imjin River virus 1]
## 338                                                       <NA>
## 339         hypothetical protein [Hubei chuvirus-like virus 3]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 217   contig_18031_1_1071_- contig_18031 getorf_JV gene     1 1071     .  FALSE
## 277    contig_2161_3_5093_-  contig_2161 getorf_JV gene     3 5093     .  FALSE
## 337    contig_4815_21_899_+  contig_4815 getorf_JV gene    21  899     .   TRUE
## 338 contig_4815_2386_2622_+  contig_4815 getorf_JV gene  2386 2622     .   TRUE
## 339  contig_4815_977_2266_+  contig_4815 getorf_JV gene   977 2266     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 217     1       1071       1137 YP_009666257.1    0.325              347
## 277     1       5091       5326 YP_009337089.1    0.370             1647
## 337     1        879       2829 YP_009182178.1    0.387              186
## 338     1        237       2829           <NA>       NA               NA
## 339     1       1290       2829 YP_009337091.1    0.262              400
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 217        232         0      9  355     11  354 1.772e-58      212
## 277       1034         0     43 1684    201 1847 0.000e+00     1094
## 337        113         0      2  187    359  544 2.371e-38      151
## 338         NA        NA     NA   NA     NA   NA        NA       NA
## 339        277         0      7  406      8  384 1.054e-34      144
##                                                     annotation
## 217                   glycoprotein [Wuchang Cockroach Virus 3]
## 277 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
## 337                putative glycoprotein [Imjin River virus 1]
## 338                                                       <NA>
## 339         hypothetical protein [Hubei chuvirus-like virus 3]

Based on RdRp (contig_2161), I built a phylogeny (first a NJ tree was built to subset the sequences used in the final ML phylogeny, using the option “select in alignment” in seaview):

p = plot_phylogeny(file = "../phylogenies/contig_2161_3_5093_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p+xlim(c(0,9.5))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2161_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

It belongs to the Chuviridae family -ssRNA (Kafer et al. 2019). “composed of”chuviruses were found to appear in linear, circular, and seg- mented circular forms”. Wuchang cockrach virus 3 is composed of two circular segments (7kb and 4.7kb, see fig. 4 of Li et al. eLife 2015).

Vera, Hubei + Chaq virus ? in D. subobscura / D. obscura

Define the corresponding contigs :

contig_set=c("contig_9152", "contig_8806", "contig_15880")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Partiti-like1_D.sub|obs'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Partiti-like1_D.sub|obs")
##                    orf_name        seqid    source type start  end score strand
## 155    contig_15880_3_848_- contig_15880 getorf_JV gene     3  848     .  FALSE
## 156 contig_15880_974_1216_+ contig_15880 getorf_JV gene   974 1216     .   TRUE
## 424  contig_8806_130_1752_-  contig_8806 getorf_JV gene   130 1752     .  FALSE
## 436  contig_9152_173_1483_-  contig_9152 getorf_JV gene   173 1483     .  FALSE
## 437    contig_9152_43_192_+  contig_9152 getorf_JV gene    43  192     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 155     1        846       1230     AWY11087.1    0.353              245
## 156     1        243       1230           <NA>       NA               NA
## 424     1       1623       1812 YP_009337870.1    0.481              510
## 436     1       1311       1768     QMI58123.1    0.317              347
## 437     1        150       1768           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 155        155         0     36  275     46  290  2.152e-40      156
## 156         NA        NA     NA   NA     NA   NA         NA       NA
## 424        262         0     30  535     16  525 5.879e-155      501
## 436        232         0     56  402     91  430  1.321e-46      180
## 437         NA        NA     NA   NA     NA   NA         NA       NA
##                               annotation
## 155    hypothetical protein [Chaq virus]
## 156                                 <NA>
## 424        RdRp [Hubei diptera virus 17]
## 436 putative capsid protein [Vera virus]
## 437                                 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 155    contig_15880_3_848_- contig_15880 getorf_JV gene     3  848     .  FALSE
## 156 contig_15880_974_1216_+ contig_15880 getorf_JV gene   974 1216     .   TRUE
## 424  contig_8806_130_1752_-  contig_8806 getorf_JV gene   130 1752     .  FALSE
## 436  contig_9152_173_1483_-  contig_9152 getorf_JV gene   173 1483     .  FALSE
## 437    contig_9152_43_192_+  contig_9152 getorf_JV gene    43  192     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 155     1        846       1230     AWY11087.1    0.353              245
## 156     1        243       1230           <NA>       NA               NA
## 424     1       1623       1812 YP_009337870.1    0.481              510
## 436     1       1311       1768     QMI58123.1    0.317              347
## 437     1        150       1768           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 155        155         0     36  275     46  290  2.152e-40      156
## 156         NA        NA     NA   NA     NA   NA         NA       NA
## 424        262         0     30  535     16  525 5.879e-155      501
## 436        232         0     56  402     91  430  1.321e-46      180
## 437         NA        NA     NA   NA     NA   NA         NA       NA
##                               annotation
## 155    hypothetical protein [Chaq virus]
## 156                                 <NA>
## 424        RdRp [Hubei diptera virus 17]
## 436 putative capsid protein [Vera virus]
## 437                                 <NA>

We built a phylogeny based on RdRp :

p = plot_phylogeny("../phylogenies/contig_8806_130_1752_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8806_130_1752_-.pdf", plot = p, width = 10, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Hubei diptera virus 17 is composed of two segments: 1731bp (coding the RdRp) and 1602bp. Interestingly, the contig 9152 shows similarity with protein YP_009337871.1 encoded by the second segment of Hubei diptera virus 17.

https://www.genome.jp/dbget-bin/www_bget?refseq:NC_033301 https://www.genome.jp/dbget-bin/www_bget?refseq:NC_033302

Vera virus : Two segments; Polymerase originally identified as Partitiviridae-like-2 (KP757929) in Webster et al (2015) PLoS Biology 13(7): e1002210

It is unclear whether “chaq virus” is part of this genome or not.

Circulifer_tenellus_virus_1 -like in Trichopria sp.

Define the corresponding contigs :

contig_set=c("contig_1434")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'dsRNA_virus1_Tricho'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "dsRNA_virus1_Tricho")
##                    orf_name       seqid    source type start  end score strand
## 128  contig_1434_190_3780_- contig_1434 getorf_JV gene   190 3780     .  FALSE
## 129 contig_1434_3900_6488_- contig_1434 getorf_JV gene  3900 6488     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 128     1       3591       7315 YP_003800003.1    0.357              807
## 129     1       2589       7315 YP_003800000.1    0.240              675
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 128        509         0    158  949    266 1072 2.067e-132      460
## 129        490         0     99  773    375 1020  5.589e-34      147
##                                                             annotation
## 128 RNA-directed RNA polymerase, partial [Circulifer tenellus virus 1]
## 129       proline-alanine-rich protein [Spissistilus festinus virus 1]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name       seqid    source type start  end score strand
## 128  contig_1434_190_3780_- contig_1434 getorf_JV gene   190 3780     .  FALSE
## 129 contig_1434_3900_6488_- contig_1434 getorf_JV gene  3900 6488     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 128     1       3591       7315 YP_003800003.1    0.357              807
## 129     1       2589       7315 YP_003800000.1    0.240              675
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 128        509         0    158  949    266 1072 2.067e-132      460
## 129        490         0     99  773    375 1020  5.589e-34      147
##                                                             annotation
## 128 RNA-directed RNA polymerase, partial [Circulifer tenellus virus 1]
## 129       proline-alanine-rich protein [Spissistilus festinus virus 1]

We built a phylogeny based on RdRp (on a subset of sequences retrieved from ncbi) :

p = plot_phylogeny(file = "../phylogenies/contig_1434_190_3780_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,18) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_1434_190_3780_-.pdf", plot = p, height = 10, width = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Circulifer tenelus virus is known as a non segmented dsRNA virus with a 8086bp genome (https://www.genome.jp/dbget-bin/www_bget?refseq:NC_014360).

Spissistilus festinus virus 1 is also a non segmented dsRNA virus with a 7951bp genome (https://www.genome.jp/dbget-bin/www_bget?refseq:NC_014359).

Persimmon latent virus is also a non segmented dsRNA virus with a 7475bp genome (https://www.genome.jp/dbget-bin/www_bget?refseq:NC_023983)

The contig we got is 7315bp which suggests it is complete or almost complete.

Operophtera brumata reovirus in Trichopria sp.

Define the corresponding contigs :

contig_set=c("contig_3022", "contig_2657", "contig_22938", "contig_7060", "contig_22971", "contig_21209", "contig_13029")
contig_set_unassigned=c("contig_7972", "contig_23185", "contig_13079")
# store for later fusion of corresponding lines
virus_list$'Reoviridae2_Tricho'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae2_Tricho")
##                    orf_name        seqid    source type start  end score strand
## 92    contig_13029_3_1022_- contig_13029 getorf_JV gene     3 1022     .  FALSE
## 93    contig_13079_1_1101_+ contig_13079 getorf_JV gene     1 1101     .   TRUE
## 94  contig_13079_256_1101_+ contig_13079 getorf_JV gene   256 1101     .   TRUE
## 267  contig_21209_85_1032_+ contig_21209 getorf_JV gene    85 1032     .   TRUE
## 307  contig_22938_57_3602_+ contig_22938 getorf_JV gene    57 3602     .   TRUE
## 308  contig_22971_24_3410_- contig_22971 getorf_JV gene    24 3410     .  FALSE
## 321   contig_23185_2_1948_+ contig_23185 getorf_JV gene     2 1948     .   TRUE
## 322 contig_23185_404_1948_+ contig_23185 getorf_JV gene   404 1948     .   TRUE
## 323  contig_2657_102_4322_+  contig_2657 getorf_JV gene   102 4322     .   TRUE
## 325  contig_3022_134_3418_-  contig_3022 getorf_JV gene   134 3418     .  FALSE
## 377  contig_7060_186_1949_-  contig_7060 getorf_JV gene   186 1949     .  FALSE
## 403  contig_7972_185_1546_+  contig_7972 getorf_JV gene   185 1546     .   TRUE
## 404    contig_7972_2_1546_+  contig_7972 getorf_JV gene     2 1546     .   TRUE
##     phase attributes seq_length  subject_id identity alignment_length
## 92      1       1020       1027  AWA82242.1    0.285              305
## 93      2       1101       1102        <NA>       NA               NA
## 94      1        846       1102        <NA>       NA               NA
## 267     1        948       1032  AWA82242.1    0.271              316
## 307     1       3546       3604 YP_392502.1    0.291             1075
## 308     1       3387       3427 YP_392503.1    0.238             1061
## 321     2       1947       2148        <NA>       NA               NA
## 322     1       1545       2148        <NA>       NA               NA
## 323     1       4221       4389 YP_392501.1    0.356             1352
## 325     1       3285       3517  AWA82240.1    0.376              458
## 377     1       1764       2107 YP_392506.1    0.247              584
## 403     1       1362       1547        <NA>       NA               NA
## 404     2       1545       1547        <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 92         213         0      1  299     72  376  6.256e-28      121
## 93          NA        NA     NA   NA     NA   NA         NA       NA
## 94          NA        NA     NA   NA     NA   NA         NA       NA
## 267        225         0      5  314    121  436  1.154e-25      114
## 307        757         0    114 1182     88 1162 6.796e-132      458
## 308        775         0     11 1071     86 1103  1.682e-74      280
## 321         NA        NA     NA   NA     NA   NA         NA       NA
## 322         NA        NA     NA   NA     NA   NA         NA       NA
## 323        831         0     20 1371     45 1336 3.086e-222      736
## 325        280         0    634 1091    614 1062  4.031e-80      297
## 377        432         0      2  585     10  584  7.794e-36      151
## 403         NA        NA     NA   NA     NA   NA         NA       NA
## 404         NA        NA     NA   NA     NA   NA         NA       NA
##                                                      annotation
## 92                          hypothetical protein [Eccles virus]
## 93                                                         <NA>
## 94                                                         <NA>
## 267                         hypothetical protein [Eccles virus]
## 307         hypothetical protein [Operophtera brumata reovirus]
## 308         hypothetical protein [Operophtera brumata reovirus]
## 321                                                        <NA>
## 322                                                        <NA>
## 323 RNA-dependent RNA polymerase [Operophtera brumata reovirus]
## 325                         hypothetical protein [Eccles virus]
## 377         hypothetical protein [Operophtera brumata reovirus]
## 403                                                        <NA>
## 404                                                        <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 92    contig_13029_3_1022_- contig_13029 getorf_JV gene     3 1022     .  FALSE
## 93    contig_13079_1_1101_+ contig_13079 getorf_JV gene     1 1101     .   TRUE
## 94  contig_13079_256_1101_+ contig_13079 getorf_JV gene   256 1101     .   TRUE
## 267  contig_21209_85_1032_+ contig_21209 getorf_JV gene    85 1032     .   TRUE
## 307  contig_22938_57_3602_+ contig_22938 getorf_JV gene    57 3602     .   TRUE
## 308  contig_22971_24_3410_- contig_22971 getorf_JV gene    24 3410     .  FALSE
## 321   contig_23185_2_1948_+ contig_23185 getorf_JV gene     2 1948     .   TRUE
## 322 contig_23185_404_1948_+ contig_23185 getorf_JV gene   404 1948     .   TRUE
## 323  contig_2657_102_4322_+  contig_2657 getorf_JV gene   102 4322     .   TRUE
## 325  contig_3022_134_3418_-  contig_3022 getorf_JV gene   134 3418     .  FALSE
## 377  contig_7060_186_1949_-  contig_7060 getorf_JV gene   186 1949     .  FALSE
## 403  contig_7972_185_1546_+  contig_7972 getorf_JV gene   185 1546     .   TRUE
## 404    contig_7972_2_1546_+  contig_7972 getorf_JV gene     2 1546     .   TRUE
##     phase attributes seq_length  subject_id identity alignment_length
## 92      1       1020       1027  AWA82242.1    0.285              305
## 93      2       1101       1102        <NA>       NA               NA
## 94      1        846       1102        <NA>       NA               NA
## 267     1        948       1032  AWA82242.1    0.271              316
## 307     1       3546       3604 YP_392502.1    0.291             1075
## 308     1       3387       3427 YP_392503.1    0.238             1061
## 321     2       1947       2148        <NA>       NA               NA
## 322     1       1545       2148        <NA>       NA               NA
## 323     1       4221       4389 YP_392501.1    0.356             1352
## 325     1       3285       3517  AWA82240.1    0.376              458
## 377     1       1764       2107 YP_392506.1    0.247              584
## 403     1       1362       1547        <NA>       NA               NA
## 404     2       1545       1547        <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 92         213         0      1  299     72  376  6.256e-28      121
## 93          NA        NA     NA   NA     NA   NA         NA       NA
## 94          NA        NA     NA   NA     NA   NA         NA       NA
## 267        225         0      5  314    121  436  1.154e-25      114
## 307        757         0    114 1182     88 1162 6.796e-132      458
## 308        775         0     11 1071     86 1103  1.682e-74      280
## 321         NA        NA     NA   NA     NA   NA         NA       NA
## 322         NA        NA     NA   NA     NA   NA         NA       NA
## 323        831         0     20 1371     45 1336 3.086e-222      736
## 325        280         0    634 1091    614 1062  4.031e-80      297
## 377        432         0      2  585     10  584  7.794e-36      151
## 403         NA        NA     NA   NA     NA   NA         NA       NA
## 404         NA        NA     NA   NA     NA   NA         NA       NA
##                                                      annotation
## 92                          hypothetical protein [Eccles virus]
## 93                                                         <NA>
## 94                                                         <NA>
## 267                         hypothetical protein [Eccles virus]
## 307         hypothetical protein [Operophtera brumata reovirus]
## 308         hypothetical protein [Operophtera brumata reovirus]
## 321                                                        <NA>
## 322                                                        <NA>
## 323 RNA-dependent RNA polymerase [Operophtera brumata reovirus]
## 325                         hypothetical protein [Eccles virus]
## 377         hypothetical protein [Operophtera brumata reovirus]
## 403                                                        <NA>
## 404                                                        <NA>
p = plot_phylogeny("../phylogenies/contig_2657_102_4322_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,9)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2657_102_4322_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Four “unknown” contigs show a perfect association with the other 7 contigs belonging to an apparent reovirus. contig_7972 does not display any similarity with known sequences. however it contains a nice single ORF. contig_23185 shows similarity with VP5 from Zoersel tick virus (QYV43123.1) based on a blastx on nr (evalue 2e-24, 23%identity) contig_9440 also show very weak similarity with Zoersel tick virus (VP7, QYV43125.1) based on a blastx on nr (evalue 3e-7, 24% identity). Zoersel tock virus is an unclassified Reoviridae. contig_13079 has no sequence similarity with any public sequence.

Rice dwarf virus / Homalodisca vitripennis reovirus in D. subobscura/obscura

Define the corresponding contigs :

contig_set=paste0("contig_", c(21878, 4624, 5982, 2830))
contig_set_unassigned=paste0("contig_", c(14848, 15668, 16918, 3406, 14764, 9139, 19603, 15083))

# store for later fusion of corresponding lines
virus_list$"Reoviridae1_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae1_D.sub|obs")
##                    orf_name        seqid    source type start  end score strand
## 134  contig_14764_63_1277_- contig_14764 getorf_JV gene    63 1277     .  FALSE
## 135  contig_14764_63_1283_- contig_14764 getorf_JV gene    63 1283     .  FALSE
## 136 contig_14848_255_1109_+ contig_14848 getorf_JV gene   255 1109     .   TRUE
## 137 contig_14848_366_1109_+ contig_14848 getorf_JV gene   366 1109     .   TRUE
## 139   contig_15083_2_1267_+ contig_15083 getorf_JV gene     2 1267     .   TRUE
## 140  contig_15083_56_1267_+ contig_15083 getorf_JV gene    56 1267     .   TRUE
## 146 contig_15668_125_1219_- contig_15668 getorf_JV gene   125 1219     .  FALSE
## 147 contig_15668_125_1240_- contig_15668 getorf_JV gene   125 1240     .  FALSE
## 174  contig_16918_11_1147_+ contig_16918 getorf_JV gene    11 1147     .   TRUE
## 175   contig_16918_5_1147_+ contig_16918 getorf_JV gene     5 1147     .   TRUE
## 235    contig_19603_1_957_+ contig_19603 getorf_JV gene     1  957     .   TRUE
## 236   contig_19603_85_957_+ contig_19603 getorf_JV gene    85  957     .   TRUE
## 285  contig_21878_68_1015_+ contig_21878 getorf_JV gene    68 1015     .   TRUE
## 324   contig_2830_14_4147_+  contig_2830 getorf_JV gene    14 4147     .   TRUE
## 330    contig_3406_3_3689_-  contig_3406 getorf_JV gene     3 3689     .  FALSE
## 331    contig_3406_3_3692_-  contig_3406 getorf_JV gene     3 3692     .  FALSE
## 334   contig_4624_11_2908_-  contig_4624 getorf_JV gene    11 2908     .  FALSE
## 357    contig_5982_2_1774_-  contig_5982 getorf_JV gene     2 1774     .  FALSE
## 434  contig_9139_104_1768_+  contig_9139 getorf_JV gene   104 1768     .   TRUE
## 435  contig_9139_113_1768_+  contig_9139 getorf_JV gene   113 1768     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 134     1       1215       1285           <NA>       NA               NA
## 135     2       1221       1285           <NA>       NA               NA
## 136     2        855       1281           <NA>       NA               NA
## 137     1        744       1281           <NA>       NA               NA
## 139     2       1266       1269           <NA>       NA               NA
## 140     1       1212       1269           <NA>       NA               NA
## 146     1       1095       1240           <NA>       NA               NA
## 147     2       1116       1240           <NA>       NA               NA
## 174     1       1137       1182           <NA>       NA               NA
## 175     2       1143       1182           <NA>       NA               NA
## 235     2        957       1082           <NA>       NA               NA
## 236     1        873       1082           <NA>       NA               NA
## 285     1        948       1016 YP_001111373.1    0.311              299
## 324     1       4134       4308 YP_001111373.1    0.308             1362
## 330     1       3687       3699           <NA>       NA               NA
## 331     2       3690       3699           <NA>       NA               NA
## 334     1       2898       2915    NP_620543.1    0.201              951
## 357     1       1773       1794 YP_002790888.1    0.328              582
## 434     2       1665       1770           <NA>       NA               NA
## 435     1       1656       1770           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 134         NA        NA     NA   NA     NA   NA         NA       NA
## 135         NA        NA     NA   NA     NA   NA         NA       NA
## 136         NA        NA     NA   NA     NA   NA         NA       NA
## 137         NA        NA     NA   NA     NA   NA         NA       NA
## 139         NA        NA     NA   NA     NA   NA         NA       NA
## 140         NA        NA     NA   NA     NA   NA         NA       NA
## 146         NA        NA     NA   NA     NA   NA         NA       NA
## 147         NA        NA     NA   NA     NA   NA         NA       NA
## 174         NA        NA     NA   NA     NA   NA         NA       NA
## 175         NA        NA     NA   NA     NA   NA         NA       NA
## 235         NA        NA     NA   NA     NA   NA         NA       NA
## 236         NA        NA     NA   NA     NA   NA         NA       NA
## 285        198         0     15  302    367  665  4.158e-37      148
## 324        905         0     24 1332     25 1386 7.764e-189      635
## 330         NA        NA     NA   NA     NA   NA         NA       NA
## 331         NA        NA     NA   NA     NA   NA         NA       NA
## 334        755         0     12  962     16  961  1.370e-24      117
## 357        388         0     13  590    210  791  3.305e-90      315
## 434         NA        NA     NA   NA     NA   NA         NA       NA
## 435         NA        NA     NA   NA     NA   NA         NA       NA
##                                                        annotation
## 134                                                          <NA>
## 135                                                          <NA>
## 136                                                          <NA>
## 137                                                          <NA>
## 139                                                          <NA>
## 140                                                          <NA>
## 146                                                          <NA>
## 147                                                          <NA>
## 174                                                          <NA>
## 175                                                          <NA>
## 235                                                          <NA>
## 236                                                          <NA>
## 285 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 324 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 330                                                          <NA>
## 331                                                          <NA>
## 334                         major core protein [Rice dwarf virus]
## 357         minor core protein [Homalodisca vitripennis reovirus]
## 434                                                          <NA>
## 435                                                          <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 134  contig_14764_63_1277_- contig_14764 getorf_JV gene    63 1277     .  FALSE
## 135  contig_14764_63_1283_- contig_14764 getorf_JV gene    63 1283     .  FALSE
## 136 contig_14848_255_1109_+ contig_14848 getorf_JV gene   255 1109     .   TRUE
## 137 contig_14848_366_1109_+ contig_14848 getorf_JV gene   366 1109     .   TRUE
## 139   contig_15083_2_1267_+ contig_15083 getorf_JV gene     2 1267     .   TRUE
## 140  contig_15083_56_1267_+ contig_15083 getorf_JV gene    56 1267     .   TRUE
## 146 contig_15668_125_1219_- contig_15668 getorf_JV gene   125 1219     .  FALSE
## 147 contig_15668_125_1240_- contig_15668 getorf_JV gene   125 1240     .  FALSE
## 174  contig_16918_11_1147_+ contig_16918 getorf_JV gene    11 1147     .   TRUE
## 175   contig_16918_5_1147_+ contig_16918 getorf_JV gene     5 1147     .   TRUE
## 235    contig_19603_1_957_+ contig_19603 getorf_JV gene     1  957     .   TRUE
## 236   contig_19603_85_957_+ contig_19603 getorf_JV gene    85  957     .   TRUE
## 285  contig_21878_68_1015_+ contig_21878 getorf_JV gene    68 1015     .   TRUE
## 324   contig_2830_14_4147_+  contig_2830 getorf_JV gene    14 4147     .   TRUE
## 330    contig_3406_3_3689_-  contig_3406 getorf_JV gene     3 3689     .  FALSE
## 331    contig_3406_3_3692_-  contig_3406 getorf_JV gene     3 3692     .  FALSE
## 334   contig_4624_11_2908_-  contig_4624 getorf_JV gene    11 2908     .  FALSE
## 357    contig_5982_2_1774_-  contig_5982 getorf_JV gene     2 1774     .  FALSE
## 434  contig_9139_104_1768_+  contig_9139 getorf_JV gene   104 1768     .   TRUE
## 435  contig_9139_113_1768_+  contig_9139 getorf_JV gene   113 1768     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 134     1       1215       1285           <NA>       NA               NA
## 135     2       1221       1285           <NA>       NA               NA
## 136     2        855       1281           <NA>       NA               NA
## 137     1        744       1281           <NA>       NA               NA
## 139     2       1266       1269           <NA>       NA               NA
## 140     1       1212       1269           <NA>       NA               NA
## 146     1       1095       1240           <NA>       NA               NA
## 147     2       1116       1240           <NA>       NA               NA
## 174     1       1137       1182           <NA>       NA               NA
## 175     2       1143       1182           <NA>       NA               NA
## 235     2        957       1082           <NA>       NA               NA
## 236     1        873       1082           <NA>       NA               NA
## 285     1        948       1016 YP_001111373.1    0.311              299
## 324     1       4134       4308 YP_001111373.1    0.308             1362
## 330     1       3687       3699           <NA>       NA               NA
## 331     2       3690       3699           <NA>       NA               NA
## 334     1       2898       2915    NP_620543.1    0.201              951
## 357     1       1773       1794 YP_002790888.1    0.328              582
## 434     2       1665       1770           <NA>       NA               NA
## 435     1       1656       1770           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 134         NA        NA     NA   NA     NA   NA         NA       NA
## 135         NA        NA     NA   NA     NA   NA         NA       NA
## 136         NA        NA     NA   NA     NA   NA         NA       NA
## 137         NA        NA     NA   NA     NA   NA         NA       NA
## 139         NA        NA     NA   NA     NA   NA         NA       NA
## 140         NA        NA     NA   NA     NA   NA         NA       NA
## 146         NA        NA     NA   NA     NA   NA         NA       NA
## 147         NA        NA     NA   NA     NA   NA         NA       NA
## 174         NA        NA     NA   NA     NA   NA         NA       NA
## 175         NA        NA     NA   NA     NA   NA         NA       NA
## 235         NA        NA     NA   NA     NA   NA         NA       NA
## 236         NA        NA     NA   NA     NA   NA         NA       NA
## 285        198         0     15  302    367  665  4.158e-37      148
## 324        905         0     24 1332     25 1386 7.764e-189      635
## 330         NA        NA     NA   NA     NA   NA         NA       NA
## 331         NA        NA     NA   NA     NA   NA         NA       NA
## 334        755         0     12  962     16  961  1.370e-24      117
## 357        388         0     13  590    210  791  3.305e-90      315
## 434         NA        NA     NA   NA     NA   NA         NA       NA
## 435         NA        NA     NA   NA     NA   NA         NA       NA
##                                                        annotation
## 134                                                          <NA>
## 135                                                          <NA>
## 136                                                          <NA>
## 137                                                          <NA>
## 139                                                          <NA>
## 140                                                          <NA>
## 146                                                          <NA>
## 147                                                          <NA>
## 174                                                          <NA>
## 175                                                          <NA>
## 235                                                          <NA>
## 236                                                          <NA>
## 285 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 324 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 330                                                          <NA>
## 331                                                          <NA>
## 334                         major core protein [Rice dwarf virus]
## 357         minor core protein [Homalodisca vitripennis reovirus]
## 434                                                          <NA>
## 435                                                          <NA>
p = plot_phylogeny("../phylogenies/contig_2830_14_4147_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2830_14_4147_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

The RdRp encoded by contig_2830_14_4147_ seems to be complete (aligns from positions 25 to 1402 with RdRp [Rice gall dwarf virus, ABF67520.1] which is 1458aa long). Surprisingly however, contig_21878_68_1015_+ also matches [Rice gall dwarf virus, ABF67520.1] RdRp but very partially and overlapping with previous (367-665). In fact, contig_21878_68_1015_ is completely nested and almost 100% identical as contig_2830_14_4147_ at the nucleotidic level. Maybe a subgenomic segment?

“unknown contigs” : contig_14848 has no significant homology with public db (blastx on nr) contig_15668 has no significant homology with public db (blastx on nr) contig_16918 has no significant homology with public db (blastx on nr) contig_3406 has hits with a reovirus minor outer capsid protein [Thrips tabaci associated reovirus 1] 2e-7 19% identity contig_14764 has no significant homology with public db (blastx on nr) contig_9139 has no significant homology with public db (blastx on nr) contig_19603 has no significant homology with public db (blastx on nr) contig_15083 has no significant homology with public db (blastx on nr)

However they all show a nice ORF. It looks like a complete reovirus with 12 segments. see file reovirus_D.sub_unassigned.blastx

Larkfield virus

PS : contig_16274 was initially annotated as Larkfield (100% identity) based on our first mmseqs2 blastx equivalent. Larkfield virus is described in Medd et al. 2018 (from D. suz). It is expected to be a totivirus. However, other part of the contig do show sequence similarities with partitiviruses (typically composed of two segments). Anyway, it is unclear whether this contig belongs to the reovirus genome. Probably not in fact.

Define the corresponding contigs :

contig_set=paste0("contig_", c(22700, 16274))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Larkfield_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Larkfield_D.sub|obs")
##                     orf_name        seqid    source type start  end score
## 164     contig_16274_2_151_+ contig_16274 getorf_JV gene     2  151     .
## 165  contig_16274_221_1039_- contig_16274 getorf_JV gene   221 1039     .
## 295 contig_22700_3291_3449_- contig_22700 getorf_JV gene  3291 3449     .
## 296 contig_22700_3448_5490_+ contig_22700 getorf_JV gene  3448 5490     .
## 297   contig_22700_86_3157_+ contig_22700 getorf_JV gene    86 3157     .
##     strand phase attributes seq_length subject_id identity alignment_length
## 164   TRUE     1        150       1211       <NA>       NA               NA
## 165  FALSE     1        819       1211       <NA>       NA               NA
## 295  FALSE     1        159       5915       <NA>       NA               NA
## 296   TRUE     1       2043       5915 AWA82248.1    1.000              681
## 297   TRUE     1       3072       5915 AWA82249.1    0.894             1024
##     mismatches gap_opens qstart qend sstart send evalue bitscore
## 164         NA        NA     NA   NA     NA   NA     NA       NA
## 165         NA        NA     NA   NA     NA   NA     NA       NA
## 295         NA        NA     NA   NA     NA   NA     NA       NA
## 296          0         0      1  681    203  883      0     1416
## 297         97         0      1 1024      1  919      0     1789
##                                                  annotation
## 164                                                    <NA>
## 165                                                    <NA>
## 295                                                    <NA>
## 296 putative RNA-dependent RNA polymerase [Larkfield virus]
## 297                  hypothetical protein [Larkfield virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                     orf_name        seqid    source type start  end score
## 164     contig_16274_2_151_+ contig_16274 getorf_JV gene     2  151     .
## 165  contig_16274_221_1039_- contig_16274 getorf_JV gene   221 1039     .
## 295 contig_22700_3291_3449_- contig_22700 getorf_JV gene  3291 3449     .
## 296 contig_22700_3448_5490_+ contig_22700 getorf_JV gene  3448 5490     .
## 297   contig_22700_86_3157_+ contig_22700 getorf_JV gene    86 3157     .
##     strand phase attributes seq_length subject_id identity alignment_length
## 164   TRUE     1        150       1211       <NA>       NA               NA
## 165  FALSE     1        819       1211       <NA>       NA               NA
## 295  FALSE     1        159       5915       <NA>       NA               NA
## 296   TRUE     1       2043       5915 AWA82248.1    1.000              681
## 297   TRUE     1       3072       5915 AWA82249.1    0.894             1024
##     mismatches gap_opens qstart qend sstart send evalue bitscore
## 164         NA        NA     NA   NA     NA   NA     NA       NA
## 165         NA        NA     NA   NA     NA   NA     NA       NA
## 295         NA        NA     NA   NA     NA   NA     NA       NA
## 296          0         0      1  681    203  883      0     1416
## 297         97         0      1 1024      1  919      0     1789
##                                                  annotation
## 164                                                    <NA>
## 165                                                    <NA>
## 295                                                    <NA>
## 296 putative RNA-dependent RNA polymerase [Larkfield virus]
## 297                  hypothetical protein [Larkfield virus]

Hubei mivirus-like in Trichopria sp.

Define the corresponding contigs :

contig_set=c("contig_5374", "contig_12656", "contig_11296")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Chuviridae3_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Mivirus_tricho")
##                    orf_name        seqid    source type start  end score strand
## 41  contig_11296_262_1518_- contig_11296 getorf_JV gene   262 1518     .  FALSE
## 78   contig_12656_58_1335_+ contig_12656 getorf_JV gene    58 1335     .   TRUE
## 346 contig_5374_1884_2501_+  contig_5374 getorf_JV gene  1884 2501     .   TRUE
## 347  contig_5374_287_1540_+  contig_5374 getorf_JV gene   287 1540     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 41      1       1257       1532 YP_009337906.1    0.309              366
## 78      1       1278       1421 YP_009666257.1    0.340              377
## 346     1        618       2604           <NA>       NA               NA
## 347     1       1254       2604 YP_009337906.1    0.304              366
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 41         252         0     10  375     37  401 6.158e-53      198
## 78         247         0      6  382    231  605 1.259e-62      227
## 346         NA        NA     NA   NA     NA   NA        NA       NA
## 347        253         0      9  374     37  401 6.940e-54      201
##                                             annotation
## 41  hypothetical protein [Hubei chuvirus-like virus 1]
## 78            glycoprotein [Wuchang Cockroach Virus 3]
## 346                                               <NA>
## 347 hypothetical protein [Hubei chuvirus-like virus 1]
## Saving 7 x 5 in image
res[1]
## [[1]]

Contig 5374 and 11296 are 95% identical at the protein level: may correspond to two strains segregating in Trichopria sp.

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 41  contig_11296_262_1518_- contig_11296 getorf_JV gene   262 1518     .  FALSE
## 78   contig_12656_58_1335_+ contig_12656 getorf_JV gene    58 1335     .   TRUE
## 346 contig_5374_1884_2501_+  contig_5374 getorf_JV gene  1884 2501     .   TRUE
## 347  contig_5374_287_1540_+  contig_5374 getorf_JV gene   287 1540     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 41      1       1257       1532 YP_009337906.1    0.309              366
## 78      1       1278       1421 YP_009666257.1    0.340              377
## 346     1        618       2604           <NA>       NA               NA
## 347     1       1254       2604 YP_009337906.1    0.304              366
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 41         252         0     10  375     37  401 6.158e-53      198
## 78         247         0      6  382    231  605 1.259e-62      227
## 346         NA        NA     NA   NA     NA   NA        NA       NA
## 347        253         0      9  374     37  401 6.940e-54      201
##                                             annotation
## 41  hypothetical protein [Hubei chuvirus-like virus 1]
## 78            glycoprotein [Wuchang Cockroach Virus 3]
## 346                                               <NA>
## 347 hypothetical protein [Hubei chuvirus-like virus 1]

Mivirus belong to the Chuviridae family and have either one or two segments encoding typically L and G protein a N protein and a VP.

Contig 11296 encodes an homolog of a nucleoprotein Contig 12656 encodes an homolog of a Glycoprotein Contig 5374 (287-1540) encodes an homolog of a nucleoprotein Contig 5374 (1884-2501) encodes an “hypothetical protein”

No trace of RdRp… However almost Contig 5374 and 11296 almost 100% identical at the nucleotidic level with

https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=2792591

see Kafer et al.2019

Ganda-orthophasmavirus-like in Trichopria sp.

Define the corresponding contigs :

contig_set=paste0("contig_", c(6541, 10992, 10108, 10707, 9260, 17877))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Phasmaviridae_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Phasmaviridae_Tricho")
##                    orf_name        seqid    source type start  end score strand
## 3    contig_10108_65_1651_+ contig_10108 getorf_JV gene    65 1651     .   TRUE
## 23  contig_10707_197_1585_+ contig_10707 getorf_JV gene   197 1585     .   TRUE
## 29    contig_10992_1_1515_- contig_10992 getorf_JV gene     1 1515     .  FALSE
## 213   contig_17877_24_971_+ contig_17877 getorf_JV gene    24  971     .   TRUE
## 368   contig_6541_128_289_-  contig_6541 getorf_JV gene   128  289     .  FALSE
## 369 contig_6541_1813_1992_-  contig_6541 getorf_JV gene  1813 1992     .  FALSE
## 370 contig_6541_2008_2172_+  contig_6541 getorf_JV gene  2008 2172     .   TRUE
## 371  contig_6541_340_1782_+  contig_6541 getorf_JV gene   340 1782     .   TRUE
## 444    contig_9260_3_1454_-  contig_9260 getorf_JV gene     3 1454     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 3       1       1587       1651 YP_009666981.1    0.584              529
## 23      1       1389       1587 YP_009666981.1    0.411              462
## 29      1       1515       1559 YP_009666981.1    0.363              446
## 213     1        948       1143 YP_009666981.1    0.324              293
## 368     1        162       2229           <NA>       NA               NA
## 369     1        180       2229           <NA>       NA               NA
## 370     1        165       2229           <NA>       NA               NA
## 371     1       1443       2229 YP_009666983.1    0.392              283
## 444     1       1452       1755 YP_009666982.1    0.321              474
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 3          219         0      1  529    541 1068 3.500e-199      629
## 23         269         0      1  462   1179 1636 6.546e-113      376
## 29         282         0     57  499     57  502  5.136e-79      278
## 213        193         0      1  293   1651 1937  1.024e-38      153
## 368         NA        NA     NA   NA     NA   NA         NA       NA
## 369         NA        NA     NA   NA     NA   NA         NA       NA
## 370         NA        NA     NA   NA     NA   NA         NA       NA
## 371        167         0     12  294     11  285  1.841e-56      211
## 444        318         0      9  482    132  601  4.895e-91      313
##                                         annotation
## 3   RNA-dependent RNA polymerase [Ganda bee virus]
## 23  RNA-dependent RNA polymerase [Ganda bee virus]
## 29  RNA-dependent RNA polymerase [Ganda bee virus]
## 213 RNA-dependent RNA polymerase [Ganda bee virus]
## 368                                           <NA>
## 369                                           <NA>
## 370                                           <NA>
## 371                nucleoprotein [Ganda bee virus]
## 444       glycoprotein precursor [Ganda bee virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 3    contig_10108_65_1651_+ contig_10108 getorf_JV gene    65 1651     .   TRUE
## 23  contig_10707_197_1585_+ contig_10707 getorf_JV gene   197 1585     .   TRUE
## 29    contig_10992_1_1515_- contig_10992 getorf_JV gene     1 1515     .  FALSE
## 213   contig_17877_24_971_+ contig_17877 getorf_JV gene    24  971     .   TRUE
## 368   contig_6541_128_289_-  contig_6541 getorf_JV gene   128  289     .  FALSE
## 369 contig_6541_1813_1992_-  contig_6541 getorf_JV gene  1813 1992     .  FALSE
## 370 contig_6541_2008_2172_+  contig_6541 getorf_JV gene  2008 2172     .   TRUE
## 371  contig_6541_340_1782_+  contig_6541 getorf_JV gene   340 1782     .   TRUE
## 444    contig_9260_3_1454_-  contig_9260 getorf_JV gene     3 1454     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 3       1       1587       1651 YP_009666981.1    0.584              529
## 23      1       1389       1587 YP_009666981.1    0.411              462
## 29      1       1515       1559 YP_009666981.1    0.363              446
## 213     1        948       1143 YP_009666981.1    0.324              293
## 368     1        162       2229           <NA>       NA               NA
## 369     1        180       2229           <NA>       NA               NA
## 370     1        165       2229           <NA>       NA               NA
## 371     1       1443       2229 YP_009666983.1    0.392              283
## 444     1       1452       1755 YP_009666982.1    0.321              474
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 3          219         0      1  529    541 1068 3.500e-199      629
## 23         269         0      1  462   1179 1636 6.546e-113      376
## 29         282         0     57  499     57  502  5.136e-79      278
## 213        193         0      1  293   1651 1937  1.024e-38      153
## 368         NA        NA     NA   NA     NA   NA         NA       NA
## 369         NA        NA     NA   NA     NA   NA         NA       NA
## 370         NA        NA     NA   NA     NA   NA         NA       NA
## 371        167         0     12  294     11  285  1.841e-56      211
## 444        318         0      9  482    132  601  4.895e-91      313
##                                         annotation
## 3   RNA-dependent RNA polymerase [Ganda bee virus]
## 23  RNA-dependent RNA polymerase [Ganda bee virus]
## 29  RNA-dependent RNA polymerase [Ganda bee virus]
## 213 RNA-dependent RNA polymerase [Ganda bee virus]
## 368                                           <NA>
## 369                                           <NA>
## 370                                           <NA>
## 371                nucleoprotein [Ganda bee virus]
## 444       glycoprotein precursor [Ganda bee virus]

Surprisingly, 4 of the contigs encode a RdRp suggesting either that several viruses are present or that the assembly is incomplete. After looking at the blast results, it is clear that the assembly is incomplete leading to fragmented RdRp. We artificially fused the 4 parts (order is : 10992, 10108, 10707 and 17877) which covered the majority of the related protein.

p = plot_phylogeny("../phylogenies/contig_10992_1_1515_-_FUSED+with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_10992_1_1515_-_FUSED.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Ganda bee virus genome is composed of three segments: 6453bp for the RdRp coding segment, 2101bp for the glycoprotein precursor (GnGc) gene and 1906bp for the nucleoprotein (N) gene. See fig 3 of Schoonvaere et al. Plos one 2016.

https://www.genome.jp/dbget-bin/www_bget?refseq:NC_043642 https://www.genome.jp/dbget-bin/www_bget?refseq:NC_043643 https://www.genome.jp/dbget-bin/www_bget?refseq:NC_043644

It seems the genome is almost complete, appart the fact that the RdRp is scattered among 4 contigs.

Contig 9260 encodes the Glycoprotein (M segment), and Contig 6541 encodes the nucleoprotein and other unannotated ORFs (S segment).

Reovirus in Asobara sp.

Define the corresponding contigs :

contig_set=c("contig_17370", "contig_17755", "contig_6072", "contig_6134", "contig_6311", "contig_17655", "contig_11550") 
# store for later fusion of corresponding lines
virus_list$"Reoviridae4_A.sp"=list(contig_set=contig_set, contig_set_unassigned=NA)

writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Reoviridae4_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reovirus Asobara")
##                    orf_name        seqid    source type start  end score strand
## 56     contig_11550_2_160_+ contig_11550 getorf_JV gene     2  160     .   TRUE
## 57    contig_11550_53_532_- contig_11550 getorf_JV gene    53  532     .  FALSE
## 58  contig_11550_538_1464_- contig_11550 getorf_JV gene   538 1464     .  FALSE
## 59  contig_11550_538_1509_- contig_11550 getorf_JV gene   538 1509     .  FALSE
## 192   contig_17370_1_1053_- contig_17370 getorf_JV gene     1 1053     .  FALSE
## 193   contig_17370_1_1161_- contig_17370 getorf_JV gene     1 1161     .  FALSE
## 202  contig_17655_43_1140_- contig_17655 getorf_JV gene    43 1140     .  FALSE
## 203  contig_17655_43_1149_- contig_17655 getorf_JV gene    43 1149     .  FALSE
## 206   contig_17755_3_1031_+ contig_17755 getorf_JV gene     3 1031     .   TRUE
## 207   contig_17755_3_1031_+ contig_17755 getorf_JV gene     3 1031     .   TRUE
## 362    contig_6072_2_2275_-  contig_6072 getorf_JV gene     2 2275     .  FALSE
## 363    contig_6072_2_2359_-  contig_6072 getorf_JV gene     2 2359     .  FALSE
## 364   contig_6134_46_2247_-  contig_6134 getorf_JV gene    46 2247     .  FALSE
## 365    contig_6311_2_2083_-  contig_6311 getorf_JV gene     2 2083     .  FALSE
## 366    contig_6311_2_2287_-  contig_6311 getorf_JV gene     2 2287     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 56      2        159       1511           <NA>       NA               NA
## 57      1        480       1511           <NA>       NA               NA
## 58      1        927       1511           <NA>       NA               NA
## 59      2        972       1511           <NA>       NA               NA
## 192     1       1053       1162           <NA>       NA               NA
## 193     2       1161       1162           <NA>       NA               NA
## 202     1       1098       1151           <NA>       NA               NA
## 203     2       1107       1151           <NA>       NA               NA
## 206     2       1029       1148           <NA>       NA               NA
## 207     1       1029       1148           <NA>       NA               NA
## 362     1       2274       2359           <NA>       NA               NA
## 363     2       2358       2359           <NA>       NA               NA
## 364     1       2202       2341 YP_009158901.1    0.233              655
## 365     1       2082       2289           <NA>       NA               NA
## 366     2       2286       2289           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 56          NA        NA     NA   NA     NA   NA        NA       NA
## 57          NA        NA     NA   NA     NA   NA        NA       NA
## 58          NA        NA     NA   NA     NA   NA        NA       NA
## 59          NA        NA     NA   NA     NA   NA        NA       NA
## 192         NA        NA     NA   NA     NA   NA        NA       NA
## 193         NA        NA     NA   NA     NA   NA        NA       NA
## 202         NA        NA     NA   NA     NA   NA        NA       NA
## 203         NA        NA     NA   NA     NA   NA        NA       NA
## 206         NA        NA     NA   NA     NA   NA        NA       NA
## 207         NA        NA     NA   NA     NA   NA        NA       NA
## 362         NA        NA     NA   NA     NA   NA        NA       NA
## 363         NA        NA     NA   NA     NA   NA        NA       NA
## 364        480         0     17  671    613 1239 5.188e-32      140
## 365         NA        NA     NA   NA     NA   NA        NA       NA
## 366         NA        NA     NA   NA     NA   NA        NA       NA
##                                            annotation
## 56                                               <NA>
## 57                                               <NA>
## 58                                               <NA>
## 59                                               <NA>
## 192                                              <NA>
## 193                                              <NA>
## 202                                              <NA>
## 203                                              <NA>
## 206                                              <NA>
## 207                                              <NA>
## 362                                              <NA>
## 363                                              <NA>
## 364 RNA-dependent RNA polymerase [Chobar Gorge virus]
## 365                                              <NA>
## 366                                              <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 56     contig_11550_2_160_+ contig_11550 getorf_JV gene     2  160     .   TRUE
## 57    contig_11550_53_532_- contig_11550 getorf_JV gene    53  532     .  FALSE
## 58  contig_11550_538_1464_- contig_11550 getorf_JV gene   538 1464     .  FALSE
## 59  contig_11550_538_1509_- contig_11550 getorf_JV gene   538 1509     .  FALSE
## 192   contig_17370_1_1053_- contig_17370 getorf_JV gene     1 1053     .  FALSE
## 193   contig_17370_1_1161_- contig_17370 getorf_JV gene     1 1161     .  FALSE
## 202  contig_17655_43_1140_- contig_17655 getorf_JV gene    43 1140     .  FALSE
## 203  contig_17655_43_1149_- contig_17655 getorf_JV gene    43 1149     .  FALSE
## 206   contig_17755_3_1031_+ contig_17755 getorf_JV gene     3 1031     .   TRUE
## 207   contig_17755_3_1031_+ contig_17755 getorf_JV gene     3 1031     .   TRUE
## 362    contig_6072_2_2275_-  contig_6072 getorf_JV gene     2 2275     .  FALSE
## 363    contig_6072_2_2359_-  contig_6072 getorf_JV gene     2 2359     .  FALSE
## 364   contig_6134_46_2247_-  contig_6134 getorf_JV gene    46 2247     .  FALSE
## 365    contig_6311_2_2083_-  contig_6311 getorf_JV gene     2 2083     .  FALSE
## 366    contig_6311_2_2287_-  contig_6311 getorf_JV gene     2 2287     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 56      2        159       1511           <NA>       NA               NA
## 57      1        480       1511           <NA>       NA               NA
## 58      1        927       1511           <NA>       NA               NA
## 59      2        972       1511           <NA>       NA               NA
## 192     1       1053       1162           <NA>       NA               NA
## 193     2       1161       1162           <NA>       NA               NA
## 202     1       1098       1151           <NA>       NA               NA
## 203     2       1107       1151           <NA>       NA               NA
## 206     2       1029       1148           <NA>       NA               NA
## 207     1       1029       1148           <NA>       NA               NA
## 362     1       2274       2359           <NA>       NA               NA
## 363     2       2358       2359           <NA>       NA               NA
## 364     1       2202       2341 YP_009158901.1    0.233              655
## 365     1       2082       2289           <NA>       NA               NA
## 366     2       2286       2289           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 56          NA        NA     NA   NA     NA   NA        NA       NA
## 57          NA        NA     NA   NA     NA   NA        NA       NA
## 58          NA        NA     NA   NA     NA   NA        NA       NA
## 59          NA        NA     NA   NA     NA   NA        NA       NA
## 192         NA        NA     NA   NA     NA   NA        NA       NA
## 193         NA        NA     NA   NA     NA   NA        NA       NA
## 202         NA        NA     NA   NA     NA   NA        NA       NA
## 203         NA        NA     NA   NA     NA   NA        NA       NA
## 206         NA        NA     NA   NA     NA   NA        NA       NA
## 207         NA        NA     NA   NA     NA   NA        NA       NA
## 362         NA        NA     NA   NA     NA   NA        NA       NA
## 363         NA        NA     NA   NA     NA   NA        NA       NA
## 364        480         0     17  671    613 1239 5.188e-32      140
## 365         NA        NA     NA   NA     NA   NA        NA       NA
## 366         NA        NA     NA   NA     NA   NA        NA       NA
##                                            annotation
## 56                                               <NA>
## 57                                               <NA>
## 58                                               <NA>
## 59                                               <NA>
## 192                                              <NA>
## 193                                              <NA>
## 202                                              <NA>
## 203                                              <NA>
## 206                                              <NA>
## 207                                              <NA>
## 362                                              <NA>
## 363                                              <NA>
## 364 RNA-dependent RNA polymerase [Chobar Gorge virus]
## 365                                              <NA>
## 366                                              <NA>

These sequences were grouped together after a later blastx analysis (27/07/2022). The 7 sequences are related to orbi-like viruses (Reoviridae).

contig 6134 is incomplete with 734 aa where the closest relative has 1284 aa. Apparently a reoviridae.

p = plot_phylogeny("../phylogenies/contig_6134_46_2247_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(0,10)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_6134_46_2247_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

I built a phylogeny for contig17370 [Serbia reo-like virus 1] using the sequences from ncbi. Interestingly it is also related to [Hubei odonate virus 15]:

p = plot_phylogeny("../phylogenies/contig_17370-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(0,5)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_17370.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

see ../TABLES/Reoviridae4_A.sp_blastx_2022_07_27.txt

for details

All reoviruses together

p = plot_phylogeny("../phylogenies/Reoviridae_all_nr2-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(0,24)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_All_reoviruses_non_redundant.pdf", plot = p, width=8, height=10)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Chuvirus in Asobara sp.

Define the corresponding contigs :

contig_set=c("contig_13728") 
# store for later fusion of corresponding lines
virus_list$"Chuviridae2_A.sp"=list(contig_set=contig_set[1], contig_set_unassigned=NA)

writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Chuviridae2_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = NA, gff = gff_wta2 , name = "Chuvirus Asobara")
##                   orf_name        seqid    source type start  end score strand
## 118 contig_13728_25_1296_+ contig_13728 getorf_JV gene    25 1296     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 118     1       1272       1346 YP_009337904.1    0.232              408
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 118        313         0      2  409   1757 2164 5.413e-21      102
##                                                     annotation
## 118 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]
## Saving 7 x 5 in image
res[1]
## [[1]]

The RdRp is incomplete for sure (13728): only 424 aa where the closest relative has 2172 aa. Apparently a Chuviridae.

p = plot_phylogeny("../phylogenies/contig_13728_25_1296_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
ggsave(filename = "../phylogenies/contig_13728_25_1296__+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Partitivirus in Asobara sp.

Define the corresponding contigs :

contig_set=c("contig_14948", "contig_15126") 
# store for later fusion of corresponding lines
virus_list$"Partiti-like2_A.sp"=list(contig_set=contig_set, contig_set_unassigned=NA)

writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Partiti-like2_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = NA, gff = gff_wta2 , name = "Partiti Asobara")
##                   orf_name        seqid    source type start  end score strand
## 138  contig_14948_2_1144_- contig_14948 getorf_JV gene     2 1144     .  FALSE
## 141 contig_15126_55_1263_- contig_15126 getorf_JV gene    55 1263     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 138     1       1143       1276    YP_052856.2    0.285              308
## 141     1       1209       1267 YP_009329869.1    0.467              371
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 138        214         0     73  372    143  450  7.122e-22      104
## 141        197         0     14  383     81  451 3.725e-115      379
##                                                          annotation
## 138 RNA-dependent RNA polymerase [Penicillium stoloniferum virus S]
## 141                                 RdRp [Beihai barnacle virus 13]
## Saving 7 x 5 in image
res[1]
## [[1]]

Contig 14948 has 381 aa and the closest relative has 539 aa. Apparently a Partitiviridae.

p = plot_phylogeny("../phylogenies/contig_14948_2_1144_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
ggsave(filename = "../phylogenies/contig_14948_2_1144_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Nidovirus in Asobara sp.

Define the corresponding contigs :

contig_set=c("contig_14619") 
# store for later fusion of corresponding lines
virus_list$"Nidoviridae_A.sp"=list(contig_set=contig_set[1], contig_set_unassigned=NA)

writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Nidoviridae_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = NA, gff = gff_wta2 , name = "Nido Asobara")
##                    orf_name        seqid    source type start  end score strand
## 133 contig_14619_145_1134_- contig_14619 getorf_JV gene   145 1134     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 133     1        990       1293 AWA82244.1    0.676               34         11
##     gap_opens qstart qend sstart send    evalue bitscore
## 133         0    269  302     41   74 5.124e-05       50
##                               annotation
## 133 hypothetical protein [Fuefuki virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

*Fuefuki-like virus : described in Medd et al. in D.suz. Nidoviridae non segmented ~16kb +ssRNA

Dark2_A.sp in Asobara sp.

contig_set=NA
contig_set_unassigned=c("contig_10171", "contig_16255")
# store for later fusion of corresponding lines
virus_list$"Dark2_A.sp"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Dark2_A.sp_Asobara")
##                    orf_name        seqid    source type start  end score strand
## 4     contig_10171_3_1598_+ contig_10171 getorf_JV gene     3 1598     .   TRUE
## 5    contig_10171_57_1598_+ contig_10171 getorf_JV gene    57 1598     .   TRUE
## 161    contig_16255_2_832_+ contig_16255 getorf_JV gene     2  832     .   TRUE
## 162  contig_16255_218_832_+ contig_16255 getorf_JV gene   218  832     .   TRUE
## 163 contig_16255_999_1211_- contig_16255 getorf_JV gene   999 1211     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 4       2       1596       1644       <NA>       NA               NA         NA
## 5       1       1542       1644       <NA>       NA               NA         NA
## 161     2        831       1212       <NA>       NA               NA         NA
## 162     1        615       1212       <NA>       NA               NA         NA
## 163     2        213       1212       <NA>       NA               NA         NA
##     gap_opens qstart qend sstart send evalue bitscore annotation
## 4          NA     NA   NA     NA   NA     NA       NA       <NA>
## 5          NA     NA   NA     NA   NA     NA       NA       <NA>
## 161        NA     NA   NA     NA   NA     NA       NA       <NA>
## 162        NA     NA   NA     NA   NA     NA       NA       <NA>
## 163        NA     NA   NA     NA   NA     NA       NA       <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

Pow Burn virus in D. sub/obs

Define the corresponding contigs :

contig_set=c("contig_22592", "contig_10041", "contig_21669", "contig_17996")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Powburn_Dsub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Powburn_Dsub|obs")
##                    orf_name        seqid    source type start  end score strand
## 2     contig_10041_2_1402_- contig_10041 getorf_JV gene     2 1402     .  FALSE
## 216 contig_17996_142_1068_- contig_17996 getorf_JV gene   142 1068     .  FALSE
## 280    contig_21669_3_524_- contig_21669 getorf_JV gene     3  524     .  FALSE
## 281  contig_21669_768_992_+ contig_21669 getorf_JV gene   768  992     .   TRUE
## 292 contig_22592_353_7750_- contig_22592 getorf_JV gene   353 7750     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 2       1       1401       1659 AMO03227.1    0.948              467         24
## 216     1        927       1139 AKH40285.1    1.000              196          0
## 280     1        522       1021 AMO03227.1    0.959              174          7
## 281     1        225       1021       <NA>       NA               NA         NA
## 292     1       7398       7911 AMO03227.1    0.951             2466        121
##     gap_opens qstart qend sstart send     evalue bitscore
## 2           0      1  467      1  467 4.418e-295      902
## 216         0      1  196   2653 2848 3.828e-120      389
## 280         0      1  174    137  310 2.524e-111      355
## 281        NA     NA   NA     NA   NA         NA       NA
## 292         0      1 2466    399 2860  0.000e+00     4779
##                                annotation
## 2   putative polyprotein [Pow Burn virus]
## 216    putative polyprotein [Thika virus]
## 280 putative polyprotein [Pow Burn virus]
## 281                                  <NA>
## 292 putative polyprotein [Pow Burn virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

Contig 21669 is 901% identical with 10041 at the nucleotidic level. Maybe a subgenomic fragment?

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 2     contig_10041_2_1402_- contig_10041 getorf_JV gene     2 1402     .  FALSE
## 216 contig_17996_142_1068_- contig_17996 getorf_JV gene   142 1068     .  FALSE
## 280    contig_21669_3_524_- contig_21669 getorf_JV gene     3  524     .  FALSE
## 281  contig_21669_768_992_+ contig_21669 getorf_JV gene   768  992     .   TRUE
## 292 contig_22592_353_7750_- contig_22592 getorf_JV gene   353 7750     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 2       1       1401       1659 AMO03227.1    0.948              467         24
## 216     1        927       1139 AKH40285.1    1.000              196          0
## 280     1        522       1021 AMO03227.1    0.959              174          7
## 281     1        225       1021       <NA>       NA               NA         NA
## 292     1       7398       7911 AMO03227.1    0.951             2466        121
##     gap_opens qstart qend sstart send     evalue bitscore
## 2           0      1  467      1  467 4.418e-295      902
## 216         0      1  196   2653 2848 3.828e-120      389
## 280         0      1  174    137  310 2.524e-111      355
## 281        NA     NA   NA     NA   NA         NA       NA
## 292         0      1 2466    399 2860  0.000e+00     4779
##                                annotation
## 2   putative polyprotein [Pow Burn virus]
## 216    putative polyprotein [Thika virus]
## 280 putative polyprotein [Pow Burn virus]
## 281                                  <NA>
## 292 putative polyprotein [Pow Burn virus]
ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand, label= annotation)) +
  geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1)  + geom_gene_label(align = "centre") +theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2)
## Warning: Removed 29 rows containing missing values (`geom_gene_label()`).

Pow burn virus has a 9,268 bp genomic sequence and has been detected in Dsub, Dobs, Dsus, Sdef according to Obbard table. Our assembly appear to be fragmented.

Thika has a 9kb genome also,

https://www.genome.jp/dbget-bin/www_bget?refseq:NC_027127

The polyprotein encoded by contig_22592 contains the following domains :

Domains for contig_22592

Based on RdRp domain, we built the following phylogeny:

p = plot_phylogeny("../phylogenies/contig_22592_353_7750_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,15)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_22592_353_7750.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Cumuto goukovirus -like in Pachycrepoideus

Define the corresponding contigs :

contig_set=c("contig_17880")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Phenuiviridae_Pachy"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Phenuiviridae_Pachy")
##                 orf_name        seqid    source type start end score strand
## 214 contig_17880_3_983_- contig_17880 getorf_JV gene     3 983     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 214     1        981       1143 YP_009664616.1    0.329              333
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 214        219         0      1  327    166  498 1.434e-52      194
##                        annotation
## 214 glycoprotein G [Cumuto virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                 orf_name        seqid    source type start end score strand
## 214 contig_17880_3_983_- contig_17880 getorf_JV gene     3 983     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 214     1        981       1143 YP_009664616.1    0.329              333
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 214        219         0      1  327    166  498 1.434e-52      194
##                        annotation
## 214 glycoprotein G [Cumuto virus]

Goukoviruses are expected to have 3 segments (1.1kb, 6.4kb and 3.2kb). They infect insects. https://viralzone.expasy.org/7102

The protein is most likely incomplete (327 aa versus ~1000 aa for related sequences). Nevertheless, we built the following phylogeny:

p = plot_phylogeny("../phylogenies/contig_17880_3_983_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,10)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_17880_3_983.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Cimodo virus in Leptopilina sp.

Define the corresponding contigs :

contig_set=paste0("contig_", c(21672, 14400, 6047, 22214, 5748, 22929, 3046, 3854, 8099, 13150))

contig_set_unassigned=paste0("contig_", c(17745, 19572))
# store for later fusion of corresponding lines
virus_list$"Reoviridae3_L.sp"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae3_L.sp")
##                    orf_name        seqid    source type start  end score strand
## 96   contig_13150_22_1296_- contig_13150 getorf_JV gene    22 1296     .  FALSE
## 130 contig_14400_288_1076_+ contig_14400 getorf_JV gene   288 1076     .   TRUE
## 204  contig_17745_53_1114_+ contig_17745 getorf_JV gene    53 1114     .   TRUE
## 205  contig_17745_74_1114_+ contig_17745 getorf_JV gene    74 1114     .   TRUE
## 233 contig_19572_109_1065_+ contig_19572 getorf_JV gene   109 1065     .   TRUE
## 234 contig_19572_157_1065_+ contig_19572 getorf_JV gene   157 1065     .   TRUE
## 282    contig_21672_1_723_- contig_21672 getorf_JV gene     1  723     .  FALSE
## 289  contig_22214_228_890_+ contig_22214 getorf_JV gene   228  890     .   TRUE
## 305  contig_22929_193_351_+ contig_22929 getorf_JV gene   193  351     .   TRUE
## 306 contig_22929_630_3638_+ contig_22929 getorf_JV gene   630 3638     .   TRUE
## 326   contig_3046_17_3883_-  contig_3046 getorf_JV gene    17 3883     .  FALSE
## 333  contig_3854_122_3259_+  contig_3854 getorf_JV gene   122 3259     .   TRUE
## 355 contig_5748_2246_2413_+  contig_5748 getorf_JV gene  2246 2413     .   TRUE
## 356   contig_5748_44_2227_+  contig_5748 getorf_JV gene    44 2227     .   TRUE
## 359  contig_6047_255_2162_+  contig_6047 getorf_JV gene   255 2162     .   TRUE
## 405 contig_8099_1260_1814_-  contig_8099 getorf_JV gene  1260 1814     .  FALSE
## 406   contig_8099_78_1205_-  contig_8099 getorf_JV gene    78 1205     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 96      1       1275       1387 YP_009059068.1    0.250              415
## 130     1        789       1307 YP_009059071.1    0.340              233
## 204     2       1062       1148           <NA>       NA               NA
## 205     1       1041       1148           <NA>       NA               NA
## 233     2        957       1083           <NA>       NA               NA
## 234     1        909       1083           <NA>       NA               NA
## 282     1        723       1021 YP_009059071.1    0.329              229
## 289     1        663       1005 YP_009059077.1    0.408              224
## 305     1        159       3742           <NA>       NA               NA
## 306     1       3009       3742 YP_009059073.1    0.403             1003
## 326     1       3867       4024 YP_009072449.1    0.470             1269
## 333     1       3138       3344 YP_009059074.1    0.322             1048
## 355     1        168       2466           <NA>       NA               NA
## 356     1       2184       2466 YP_009059076.1    0.264              673
## 359     1       1908       2178 YP_009059075.1    0.501              631
## 405     1        555       1917           <NA>       NA               NA
## 406     1       1128       1917 YP_009059067.1    0.253              304
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 96         295         0     18  411     79  493  3.846e-32      136
## 130        149         0     20  245      5  237  1.592e-26      115
## 204         NA        NA     NA   NA     NA   NA         NA       NA
## 205         NA        NA     NA   NA     NA   NA         NA       NA
## 233         NA        NA     NA   NA     NA   NA         NA       NA
## 234         NA        NA     NA   NA     NA   NA         NA       NA
## 282        148         0     20  241      5  233  2.174e-24      108
## 289        128         0      1  218    350  573  1.191e-43      163
## 305         NA        NA     NA   NA     NA   NA         NA       NA
## 306        596         0      2 1001    196 1198 6.704e-261      832
## 326        671         0     17 1285     66 1332  0.000e+00     1154
## 333        702         0      4 1040      4 1051 8.497e-170      567
## 355         NA        NA     NA   NA     NA   NA         NA       NA
## 356        472         0     87  728     39  711  6.071e-53      207
## 359        314         0      1  631     99  728 9.422e-182      584
## 405         NA        NA     NA   NA     NA   NA         NA       NA
## 406        225         0     73  376    279  580  5.766e-19       95
##                                      annotation
## 96          hypothetical protein [Cimodo virus]
## 130         hypothetical protein [Cimodo virus]
## 204                                        <NA>
## 205                                        <NA>
## 233                                        <NA>
## 234                                        <NA>
## 282         hypothetical protein [Cimodo virus]
## 289   NTP-binding domain protein [Cimodo virus]
## 305                                        <NA>
## 306         hypothetical protein [Cimodo virus]
## 326 RNA-dependent RNA polymerase [Cimodo virus]
## 333         hypothetical protein [Cimodo virus]
## 355                                        <NA>
## 356         hypothetical protein [Cimodo virus]
## 359         hypothetical protein [Cimodo virus]
## 405                                        <NA>
## 406         hypothetical protein [Cimodo virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 96   contig_13150_22_1296_- contig_13150 getorf_JV gene    22 1296     .  FALSE
## 130 contig_14400_288_1076_+ contig_14400 getorf_JV gene   288 1076     .   TRUE
## 204  contig_17745_53_1114_+ contig_17745 getorf_JV gene    53 1114     .   TRUE
## 205  contig_17745_74_1114_+ contig_17745 getorf_JV gene    74 1114     .   TRUE
## 233 contig_19572_109_1065_+ contig_19572 getorf_JV gene   109 1065     .   TRUE
## 234 contig_19572_157_1065_+ contig_19572 getorf_JV gene   157 1065     .   TRUE
## 282    contig_21672_1_723_- contig_21672 getorf_JV gene     1  723     .  FALSE
## 289  contig_22214_228_890_+ contig_22214 getorf_JV gene   228  890     .   TRUE
## 305  contig_22929_193_351_+ contig_22929 getorf_JV gene   193  351     .   TRUE
## 306 contig_22929_630_3638_+ contig_22929 getorf_JV gene   630 3638     .   TRUE
## 326   contig_3046_17_3883_-  contig_3046 getorf_JV gene    17 3883     .  FALSE
## 333  contig_3854_122_3259_+  contig_3854 getorf_JV gene   122 3259     .   TRUE
## 355 contig_5748_2246_2413_+  contig_5748 getorf_JV gene  2246 2413     .   TRUE
## 356   contig_5748_44_2227_+  contig_5748 getorf_JV gene    44 2227     .   TRUE
## 359  contig_6047_255_2162_+  contig_6047 getorf_JV gene   255 2162     .   TRUE
## 405 contig_8099_1260_1814_-  contig_8099 getorf_JV gene  1260 1814     .  FALSE
## 406   contig_8099_78_1205_-  contig_8099 getorf_JV gene    78 1205     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 96      1       1275       1387 YP_009059068.1    0.250              415
## 130     1        789       1307 YP_009059071.1    0.340              233
## 204     2       1062       1148           <NA>       NA               NA
## 205     1       1041       1148           <NA>       NA               NA
## 233     2        957       1083           <NA>       NA               NA
## 234     1        909       1083           <NA>       NA               NA
## 282     1        723       1021 YP_009059071.1    0.329              229
## 289     1        663       1005 YP_009059077.1    0.408              224
## 305     1        159       3742           <NA>       NA               NA
## 306     1       3009       3742 YP_009059073.1    0.403             1003
## 326     1       3867       4024 YP_009072449.1    0.470             1269
## 333     1       3138       3344 YP_009059074.1    0.322             1048
## 355     1        168       2466           <NA>       NA               NA
## 356     1       2184       2466 YP_009059076.1    0.264              673
## 359     1       1908       2178 YP_009059075.1    0.501              631
## 405     1        555       1917           <NA>       NA               NA
## 406     1       1128       1917 YP_009059067.1    0.253              304
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 96         295         0     18  411     79  493  3.846e-32      136
## 130        149         0     20  245      5  237  1.592e-26      115
## 204         NA        NA     NA   NA     NA   NA         NA       NA
## 205         NA        NA     NA   NA     NA   NA         NA       NA
## 233         NA        NA     NA   NA     NA   NA         NA       NA
## 234         NA        NA     NA   NA     NA   NA         NA       NA
## 282        148         0     20  241      5  233  2.174e-24      108
## 289        128         0      1  218    350  573  1.191e-43      163
## 305         NA        NA     NA   NA     NA   NA         NA       NA
## 306        596         0      2 1001    196 1198 6.704e-261      832
## 326        671         0     17 1285     66 1332  0.000e+00     1154
## 333        702         0      4 1040      4 1051 8.497e-170      567
## 355         NA        NA     NA   NA     NA   NA         NA       NA
## 356        472         0     87  728     39  711  6.071e-53      207
## 359        314         0      1  631     99  728 9.422e-182      584
## 405         NA        NA     NA   NA     NA   NA         NA       NA
## 406        225         0     73  376    279  580  5.766e-19       95
##                                      annotation
## 96          hypothetical protein [Cimodo virus]
## 130         hypothetical protein [Cimodo virus]
## 204                                        <NA>
## 205                                        <NA>
## 233                                        <NA>
## 234                                        <NA>
## 282         hypothetical protein [Cimodo virus]
## 289   NTP-binding domain protein [Cimodo virus]
## 305                                        <NA>
## 306         hypothetical protein [Cimodo virus]
## 326 RNA-dependent RNA polymerase [Cimodo virus]
## 333         hypothetical protein [Cimodo virus]
## 355                                        <NA>
## 356         hypothetical protein [Cimodo virus]
## 359         hypothetical protein [Cimodo virus]
## 405                                        <NA>
## 406         hypothetical protein [Cimodo virus]

Cimodo virus has a 12-segmented genome https://www.genome.jp/virushostdb/1427476

Two contigs without homology are included as they co-occur with the other 10 segments (17745 18322 : note that they are 93% identicial at the nucleotide level). They do contain a single ORF each.

We built a phylogeny based on putative RdRp:

p = plot_phylogeny("../phylogenies/contig_3046_17_3883_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,8)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_3046.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Bloomfield virus-like in Leptopilina sp., Pachy et D. kuntzei.

impossible to ascribe the unassigned contigs to either Bloomfield or Cimodo virus..

Define the corresponding contigs :

contig_set=paste0("contig_", c(8808,4942, 8318, 8787, 4957))
contig_set_unassigned=NA
#contig_set_unassigned=paste0("contig_", c( 20236, 7428,3260, 18191, 10916, 7503, 16163))
# store for later fusion of corresponding lines
virus_list$"Reoviridae7"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae7")
##                   orf_name       seqid    source type start  end score strand
## 340   contig_4942_3_2741_- contig_4942 getorf_JV gene     3 2741     .  FALSE
## 341   contig_4957_2_2599_- contig_4957 getorf_JV gene     2 2599     .  FALSE
## 342   contig_4957_2_2626_- contig_4957 getorf_JV gene     2 2626     .  FALSE
## 420 contig_8318_119_1861_- contig_8318 getorf_JV gene   119 1861     .  FALSE
## 423  contig_8787_83_1774_- contig_8787 getorf_JV gene    83 1774     .  FALSE
## 425   contig_8808_1_1716_- contig_8808 getorf_JV gene     1 1716     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 340     1       2739       2779 AKH40310.1    0.332              904        597
## 341     1       2598       2772       <NA>       NA               NA         NA
## 342     2       2625       2772       <NA>       NA               NA         NA
## 420     1       1743       1883 AKH40314.1    0.285              239        167
## 423     1       1692       1813 AKH40315.1    0.290              508        335
## 425     1       1716       1812 AKH40312.1    0.267              215        152
##     gap_opens qstart qend sstart send     evalue bitscore
## 340         0     18  912     98 1001 1.698e-140      475
## 341        NA     NA   NA     NA   NA         NA       NA
## 342        NA     NA   NA     NA   NA         NA       NA
## 420         0    337  570    432  670  1.828e-16       89
## 423         0     12  519     29  501  9.100e-48      187
## 425         0     65  273     63  277  4.012e-08       62
##                                 annotation
## 340 putative polymerase [Bloomfield virus]
## 341                                   <NA>
## 342                                   <NA>
## 420                ORF1 [Bloomfield virus]
## 423                ORF1 [Bloomfield virus]
## 425                ORF1 [Bloomfield virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

contig 4957 => putative major core protein [Bloomfield virus] Sequence ID: AKH40311.1Length: 1228 Full length contig 20236,7428,3260, 18191 => no hit

res[2]
## [[1]]
##                   orf_name       seqid    source type start  end score strand
## 340   contig_4942_3_2741_- contig_4942 getorf_JV gene     3 2741     .  FALSE
## 341   contig_4957_2_2599_- contig_4957 getorf_JV gene     2 2599     .  FALSE
## 342   contig_4957_2_2626_- contig_4957 getorf_JV gene     2 2626     .  FALSE
## 420 contig_8318_119_1861_- contig_8318 getorf_JV gene   119 1861     .  FALSE
## 423  contig_8787_83_1774_- contig_8787 getorf_JV gene    83 1774     .  FALSE
## 425   contig_8808_1_1716_- contig_8808 getorf_JV gene     1 1716     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 340     1       2739       2779 AKH40310.1    0.332              904        597
## 341     1       2598       2772       <NA>       NA               NA         NA
## 342     2       2625       2772       <NA>       NA               NA         NA
## 420     1       1743       1883 AKH40314.1    0.285              239        167
## 423     1       1692       1813 AKH40315.1    0.290              508        335
## 425     1       1716       1812 AKH40312.1    0.267              215        152
##     gap_opens qstart qend sstart send     evalue bitscore
## 340         0     18  912     98 1001 1.698e-140      475
## 341        NA     NA   NA     NA   NA         NA       NA
## 342        NA     NA   NA     NA   NA         NA       NA
## 420         0    337  570    432  670  1.828e-16       89
## 423         0     12  519     29  501  9.100e-48      187
## 425         0     65  273     63  277  4.012e-08       62
##                                 annotation
## 340 putative polymerase [Bloomfield virus]
## 341                                   <NA>
## 342                                   <NA>
## 420                ORF1 [Bloomfield virus]
## 423                ORF1 [Bloomfield virus]
## 425                ORF1 [Bloomfield virus]
# # some of the contigs do have ORFs but incomplete (lacking either start, stop or both). To get them, run the script ../../scripts_annotation/orf_prediction.R with option 0 for getorf to predict them. 
# gff_unassigned_option0=read.table("../sequences/RNA_virus_genomes/Bloomfield-like_Lepto_Dkun_Pachy_unassigned.gff", header=FALSE)
# names(gff_unassigned_option0)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "seq_length")
# # use phase column to indicate that the orf has been predicted with getorf option 0 (instead of 1)
# gff_unassigned_option0$phase="*"
# orf_names=paste(gff_unassigned_option0$seqid, gff_unassigned_option0$start, gff_unassigned_option0$end, gff_unassigned_option0$strand, sep="_")
# orf_names=sub(pattern = "TRUE", "+", x = orf_names)
# orf_names=sub(pattern = "FALSE", "-", x = orf_names)
# gff_unassigned_option0$orf_name=orf_names
# 
# # join the tables
# tab=full_join(tab,gff_unassigned_option0)
# tab
#tab$phase=as.factor(x = tab$phase)
#levels(tab$phase)=c(2,1)

# add annotation for 
#tab[tab$orf_name=="contig_4957_2_2626_-",]$annotation="putative major core protein [Bloomfield virus]"

We built a phylogeny based on putative RdRp:

p = plot_phylogeny("../phylogenies/contig_4942_3_2741_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,14)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_4942_3_2741.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Cimodo virus-like in L. heterotoma L. boulardi, Trichopria, D. kuntzei, Pachy and Asobara sp.

Define the corresponding contigs :

contig_set=paste0("contig_", c(5007,6823, 8100))
contig_set_unassigned=NA
#contig_set_unassigned=paste0("contig_", c(16163, 10916, 19572, 7503, 12982, 7433))
# store for later fusion of corresponding lines
virus_list$"Reoviridae6"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae6")
##                  orf_name       seqid    source type start  end score strand
## 344 contig_5007_80_2752_+ contig_5007 getorf_JV gene    80 2752     .   TRUE
## 372 contig_6823_34_2163_+ contig_6823 getorf_JV gene    34 2163     .   TRUE
## 407 contig_8100_17_1819_- contig_8100 getorf_JV gene    17 1819     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 344     1       2673       2752 YP_009059074.1    0.218              835
## 372     1       2130       2163 YP_009072449.1    0.275              515
## 407     1       1803       1917 YP_009072449.1    0.250              539
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 344        629         0     13  817    119  953 1.102e-11       74
## 372        364         0    127  641    126  628 5.960e-41      169
## 407        398         0     20  550    750 1288 8.838e-37      154
##                                      annotation
## 344         hypothetical protein [Cimodo virus]
## 372 RNA-dependent RNA polymerase [Cimodo virus]
## 407 RNA-dependent RNA polymerase [Cimodo virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                  orf_name       seqid    source type start  end score strand
## 344 contig_5007_80_2752_+ contig_5007 getorf_JV gene    80 2752     .   TRUE
## 372 contig_6823_34_2163_+ contig_6823 getorf_JV gene    34 2163     .   TRUE
## 407 contig_8100_17_1819_- contig_8100 getorf_JV gene    17 1819     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 344     1       2673       2752 YP_009059074.1    0.218              835
## 372     1       2130       2163 YP_009072449.1    0.275              515
## 407     1       1803       1917 YP_009072449.1    0.250              539
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 344        629         0     13  817    119  953 1.102e-11       74
## 372        364         0    127  641    126  628 5.960e-41      169
## 407        398         0     20  550    750 1288 8.838e-37      154
##                                      annotation
## 344         hypothetical protein [Cimodo virus]
## 372 RNA-dependent RNA polymerase [Cimodo virus]
## 407 RNA-dependent RNA polymerase [Cimodo virus]

Contig_7503 has a very weak hit (10e-5) with cimodo-like virus : MAG: hypothetical protein [Diaphorina citri cimodo-like virus] Sequence ID: QXG83186.1Length: 700 Other unassigned contigs have no hit.

The RdRp protein is split in two contigs (contig 6823 for the first part and 8100 for the C-terminal).

We built a phylogeny based on the fusion of both parts of the RdRp:

p = plot_phylogeny("../phylogenies/contig_8100_17_1819_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,8)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8100_17_1819.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Wuhan insect virus 15-like in L. heterotoma (n=3 samples positives /5)

Define the corresponding contigs :

contig_set=paste0("contig_", c(2030))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Qinviridae_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Qinviridae_L.h")
##                  orf_name       seqid    source type start  end score strand
## 257 contig_2030_53_5533_+ contig_2030 getorf_JV gene    53 5533     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 257     1       5481       5586 YP_009342465.1    0.382             1645
##     mismatches gap_opens qstart qend sstart send evalue bitscore
## 257       1006         0      1 1645      1 1629      0     1096
##                                               annotation
## 257 RNA-dependent RNA polymerase [Wuhan insect virus 15]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                  orf_name       seqid    source type start  end score strand
## 257 contig_2030_53_5533_+ contig_2030 getorf_JV gene    53 5533     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 257     1       5481       5586 YP_009342465.1    0.382             1645
##     mismatches gap_opens qstart qend sstart send evalue bitscore
## 257       1006         0      1 1645      1 1629      0     1096
##                                               annotation
## 257 RNA-dependent RNA polymerase [Wuhan insect virus 15]

A single contig is found, where we expect a bisegmented genome (1601bp => hypo prot; 5889 bp=> RdRp). It contains a full length RdRp protein.

https://www.genome.jp/virushostdb/1923719

We built a phylogeny based on the fusion of both parts of the RdRp:

p = plot_phylogeny("../phylogenies/contig_2030_53_5533_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,14)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2030.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Formica exsecta virus-like in L. heterotoma.

Define the corresponding contigs :

contig_set=paste0("contig_", c(18281, 17904))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Iflaviridae_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Formica_Lh")
##                   orf_name        seqid    source type start  end score strand
## 215  contig_17904_3_1124_- contig_17904 getorf_JV gene     3 1124     .  FALSE
## 221 contig_18281_88_1095_- contig_18281 getorf_JV gene    88 1095     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 215     1       1122       1142 YP_008888537.1    0.378              307
## 221     1       1008       1127 YP_008888537.1    0.411              243
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 215        190         0      2  308   1445 1751 1.674e-58      213
## 221        142         0     93  335    864 1105 3.409e-54      199
##                                annotation
## 215 polyprotein [Formica exsecta virus 2]
## 221 polyprotein [Formica exsecta virus 2]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name        seqid    source type start  end score strand
## 215  contig_17904_3_1124_- contig_17904 getorf_JV gene     3 1124     .  FALSE
## 221 contig_18281_88_1095_- contig_18281 getorf_JV gene    88 1095     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 215     1       1122       1142 YP_008888537.1    0.378              307
## 221     1       1008       1127 YP_008888537.1    0.411              243
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 215        190         0      2  308   1445 1751 1.674e-58      213
## 221        142         0     93  335    864 1105 3.409e-54      199
##                                annotation
## 215 polyprotein [Formica exsecta virus 2]
## 221 polyprotein [Formica exsecta virus 2]

Formica exsecta virus has a 9160bp genome encoding a polyprotein.

https://www.genome.jp/dbget-bin/www_bget?refseq:NC_023022

The genome here is thus incomplete for sure. Dhaygude et al. PeerJ 2019

Domains for contig_17904

Domains for contig_18281

In the absence of RdRp in our sequences, we built a phylogeny based on the capsid domain :

p = plot_phylogeny("../phylogenies/contig_18281_88_1095_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
#p = p + xlim(0,8)
ggsave(filename = "../phylogenies/contig_18281_88_1095.pdf", plot = p, width = 8, height = 10)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Kwi virus-like in L. heterotoma

Define the corresponding contigs :

contig_set=paste0("contig_", c(8520, 13838, 19523, 9814, 9238, 6934, 9023, 10017))
contig_set_unassigned=paste0("contig_", c(13351, 9049, 8831))
# store for later fusion of corresponding lines
virus_list$"Quenyavirus_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Quenyavirus_L.h")
##                    orf_name        seqid    source type start  end score strand
## 1   contig_10017_205_1635_- contig_10017 getorf_JV gene   205 1635     .  FALSE
## 109    contig_13351_1_159_- contig_13351 getorf_JV gene     1  159     .  FALSE
## 110    contig_13351_1_180_- contig_13351 getorf_JV gene     1  180     .  FALSE
## 111 contig_13351_306_1307_- contig_13351 getorf_JV gene   306 1307     .  FALSE
## 112 contig_13351_306_1370_- contig_13351 getorf_JV gene   306 1370     .  FALSE
## 121 contig_13838_287_1228_- contig_13838 getorf_JV gene   287 1228     .  FALSE
## 231   contig_19523_74_730_+ contig_19523 getorf_JV gene    74  730     .   TRUE
## 232 contig_19523_857_1084_+ contig_19523 getorf_JV gene   857 1084     .   TRUE
## 375   contig_6934_21_2027_+  contig_6934 getorf_JV gene    21 2027     .   TRUE
## 421   contig_8520_33_1451_+  contig_8520 getorf_JV gene    33 1451     .   TRUE
## 426   contig_8831_14_1726_+  contig_8831 getorf_JV gene    14 1726     .   TRUE
## 427    contig_8831_2_1726_+  contig_8831 getorf_JV gene     2 1726     .   TRUE
## 429  contig_9023_366_1742_-  contig_9023 getorf_JV gene   366 1742     .  FALSE
## 432    contig_9049_2_1744_+  contig_9049 getorf_JV gene     2 1744     .   TRUE
## 433   contig_9049_20_1744_+  contig_9049 getorf_JV gene    20 1744     .   TRUE
## 442   contig_9238_15_1424_+  contig_9238 getorf_JV gene    15 1424     .   TRUE
## 443 contig_9238_1569_1757_+  contig_9238 getorf_JV gene  1569 1757     .   TRUE
## 469  contig_9814_148_1650_-  contig_9814 getorf_JV gene   148 1650     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 1       1       1431       1662 AVB77242.1    0.665              476        158
## 109     1        159       1372       <NA>       NA               NA         NA
## 110     2        180       1372       <NA>       NA               NA         NA
## 111     1       1002       1372       <NA>       NA               NA         NA
## 112     2       1065       1372       <NA>       NA               NA         NA
## 121     1        942       1339       <NA>       NA               NA         NA
## 231     1        657       1084 AVB77242.1    0.339              213        139
## 232     1        228       1084       <NA>       NA               NA         NA
## 375     1       2007       2138 AYU49214.1    0.744              670        171
## 421     1       1419       1850 AVB77240.1    0.346              312        193
## 426     1       1713       1809       <NA>       NA               NA         NA
## 427     2       1725       1809       <NA>       NA               NA         NA
## 429     1       1377       1784 AVB77241.1    0.512              453        216
## 432     2       1743       1781       <NA>       NA               NA         NA
## 433     1       1725       1781       <NA>       NA               NA         NA
## 442     1       1410       1758 AVB77240.1    0.559              450        193
## 443     1        189       1758       <NA>       NA               NA         NA
## 469     1       1503       1686 AVB77239.1    0.667              502        167
##     gap_opens qstart qend sstart send     evalue bitscore
## 1           0      1  476      1  474 9.200e-212      662
## 109        NA     NA   NA     NA   NA         NA       NA
## 110        NA     NA   NA     NA   NA         NA       NA
## 111        NA     NA   NA     NA   NA         NA       NA
## 112        NA     NA   NA     NA   NA         NA       NA
## 121        NA     NA   NA     NA   NA         NA       NA
## 231         0      2  212    259  471  2.999e-27      115
## 232        NA     NA   NA     NA   NA         NA       NA
## 375         0      1  669      1  670  0.000e+00     1050
## 421         0     41  336      4  315  2.908e-49      189
## 426        NA     NA   NA     NA   NA         NA       NA
## 427        NA     NA   NA     NA   NA         NA       NA
## 429         0      7  459      5  448 5.129e-132      431
## 432        NA     NA   NA     NA   NA         NA       NA
## 433        NA     NA   NA     NA   NA         NA       NA
## 442         0      4  453      1  439 5.578e-158      507
## 443        NA     NA   NA     NA   NA         NA       NA
## 469         0      1  501      1  502 2.685e-233      726
##                              annotation
## 1      hypothetical protein [Kwi virus]
## 109                                <NA>
## 110                                <NA>
## 111                                <NA>
## 112                                <NA>
## 121                                <NA>
## 231    hypothetical protein [Kwi virus]
## 232                                <NA>
## 375 putative RNA polymerase [Kwi virus]
## 421    hypothetical protein [Kwi virus]
## 426                                <NA>
## 427                                <NA>
## 429    hypothetical protein [Kwi virus]
## 432                                <NA>
## 433                                <NA>
## 442    hypothetical protein [Kwi virus]
## 443                                <NA>
## 469    hypothetical protein [Kwi virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 1   contig_10017_205_1635_- contig_10017 getorf_JV gene   205 1635     .  FALSE
## 109    contig_13351_1_159_- contig_13351 getorf_JV gene     1  159     .  FALSE
## 110    contig_13351_1_180_- contig_13351 getorf_JV gene     1  180     .  FALSE
## 111 contig_13351_306_1307_- contig_13351 getorf_JV gene   306 1307     .  FALSE
## 112 contig_13351_306_1370_- contig_13351 getorf_JV gene   306 1370     .  FALSE
## 121 contig_13838_287_1228_- contig_13838 getorf_JV gene   287 1228     .  FALSE
## 231   contig_19523_74_730_+ contig_19523 getorf_JV gene    74  730     .   TRUE
## 232 contig_19523_857_1084_+ contig_19523 getorf_JV gene   857 1084     .   TRUE
## 375   contig_6934_21_2027_+  contig_6934 getorf_JV gene    21 2027     .   TRUE
## 421   contig_8520_33_1451_+  contig_8520 getorf_JV gene    33 1451     .   TRUE
## 426   contig_8831_14_1726_+  contig_8831 getorf_JV gene    14 1726     .   TRUE
## 427    contig_8831_2_1726_+  contig_8831 getorf_JV gene     2 1726     .   TRUE
## 429  contig_9023_366_1742_-  contig_9023 getorf_JV gene   366 1742     .  FALSE
## 432    contig_9049_2_1744_+  contig_9049 getorf_JV gene     2 1744     .   TRUE
## 433   contig_9049_20_1744_+  contig_9049 getorf_JV gene    20 1744     .   TRUE
## 442   contig_9238_15_1424_+  contig_9238 getorf_JV gene    15 1424     .   TRUE
## 443 contig_9238_1569_1757_+  contig_9238 getorf_JV gene  1569 1757     .   TRUE
## 469  contig_9814_148_1650_-  contig_9814 getorf_JV gene   148 1650     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 1       1       1431       1662 AVB77242.1    0.665              476        158
## 109     1        159       1372       <NA>       NA               NA         NA
## 110     2        180       1372       <NA>       NA               NA         NA
## 111     1       1002       1372       <NA>       NA               NA         NA
## 112     2       1065       1372       <NA>       NA               NA         NA
## 121     1        942       1339       <NA>       NA               NA         NA
## 231     1        657       1084 AVB77242.1    0.339              213        139
## 232     1        228       1084       <NA>       NA               NA         NA
## 375     1       2007       2138 AYU49214.1    0.744              670        171
## 421     1       1419       1850 AVB77240.1    0.346              312        193
## 426     1       1713       1809       <NA>       NA               NA         NA
## 427     2       1725       1809       <NA>       NA               NA         NA
## 429     1       1377       1784 AVB77241.1    0.512              453        216
## 432     2       1743       1781       <NA>       NA               NA         NA
## 433     1       1725       1781       <NA>       NA               NA         NA
## 442     1       1410       1758 AVB77240.1    0.559              450        193
## 443     1        189       1758       <NA>       NA               NA         NA
## 469     1       1503       1686 AVB77239.1    0.667              502        167
##     gap_opens qstart qend sstart send     evalue bitscore
## 1           0      1  476      1  474 9.200e-212      662
## 109        NA     NA   NA     NA   NA         NA       NA
## 110        NA     NA   NA     NA   NA         NA       NA
## 111        NA     NA   NA     NA   NA         NA       NA
## 112        NA     NA   NA     NA   NA         NA       NA
## 121        NA     NA   NA     NA   NA         NA       NA
## 231         0      2  212    259  471  2.999e-27      115
## 232        NA     NA   NA     NA   NA         NA       NA
## 375         0      1  669      1  670  0.000e+00     1050
## 421         0     41  336      4  315  2.908e-49      189
## 426        NA     NA   NA     NA   NA         NA       NA
## 427        NA     NA   NA     NA   NA         NA       NA
## 429         0      7  459      5  448 5.129e-132      431
## 432        NA     NA   NA     NA   NA         NA       NA
## 433        NA     NA   NA     NA   NA         NA       NA
## 442         0      4  453      1  439 5.578e-158      507
## 443        NA     NA   NA     NA   NA         NA       NA
## 469         0      1  501      1  502 2.685e-233      726
##                              annotation
## 1      hypothetical protein [Kwi virus]
## 109                                <NA>
## 110                                <NA>
## 111                                <NA>
## 112                                <NA>
## 121                                <NA>
## 231    hypothetical protein [Kwi virus]
## 232                                <NA>
## 375 putative RNA polymerase [Kwi virus]
## 421    hypothetical protein [Kwi virus]
## 426                                <NA>
## 427                                <NA>
## 429    hypothetical protein [Kwi virus]
## 432                                <NA>
## 433                                <NA>
## 442    hypothetical protein [Kwi virus]
## 443                                <NA>
## 469    hypothetical protein [Kwi virus]

The 3 unannotated contigs show no hits based on blastx on nr. However, they do have a nice ORF (with gertorf option 0).

Kwi virus have been described in Obbard et al. 2020 (see fig. 1) from “dark matter” of Webster et al. 2015. Composed (for now) of 5 segments approx 2kb each. A new virus family probably.

Let’s check the blastx result from the original contigs :

blastx=read.table("../TABLES/all_taxonomy.blastx", sep=";",h=T)
blastx_subset=blastx[blastx$query %in% contig_set,]

contig_9814 => seg 1 +++ contig_8520 => seg 2 contig_9238 => seg2 contig_9023 => seg3 contig_13838 => seg 3 contig_10017 => seg4 contig_19523 => seg 4 contig_6934 => seg5 +++

contig_8520 and contig_9238 appear to be homologous contig_9023 and contig_13838 appear to be homologous contig_10017 and contig_19523 appear to be homologous

It looks like two variants are present. Both are present in Lh Ige June 2012, whereas only one is present at the same location in 2011; the other variant is the unique we found in Lh Goth 2012…

Its is likely that the three additional contigs are part of the genome also… Need to check co-occurrence on a wider set of samples.

Phylogeny on RdRp (with homologs from nr - blastx) :

p = plot_phylogeny("../phylogenies/Kwi_RdRp_homogs_nr-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,10)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/Kwi_RdRp_homogs_nr.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Hermitage virus in L. heterotoma

Define the corresponding contigs :

contig_set=paste0("contig_", c(8242))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Hermitage_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Hermitage_L.h")
##                   orf_name       seqid    source type start  end score strand
## 418 contig_8242_167_1894_+ contig_8242 getorf_JV gene   167 1894     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 418     1       1728       1894 AMO03217.1    0.927              576         42
##     gap_opens qstart qend sstart send evalue bitscore
## 418         0      1  576      8  583      0     1079
##                                          annotation
## 418 putative polyprotein, partial [Hermitage virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name       seqid    source type start  end score strand
## 418 contig_8242_167_1894_+ contig_8242 getorf_JV gene   167 1894     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 418     1       1728       1894 AMO03217.1    0.927              576         42
##     gap_opens qstart qend sstart send evalue bitscore
## 418         0      1  576      8  583      0     1079
##                                          annotation
## 418 putative polyprotein, partial [Hermitage virus]

No phylogeny was built since a single sequence showed homology (Hermitage virus).

Described in Webster et al. 2016 : “rna. related to Gentian Kobu-sho-associated virus (reported to be dsrna74) and a virus-like transcript from Conwentzia pso- ciformis. Distantly related to soybean cyst nematode virus 5 and the Flavivirus-like Xinzhou spider virus 2. [Two un-joined contigs of 3.2 kbp and 3.5 kbp encoding a putative polyprotein]”

Ganda orthophasmavirus-like in L. heterotoma

Define the corresponding contigs :

contig_set=paste0("contig_", c(21211))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Phasmaviridae_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Phasmaviridae_L.h")
##                  orf_name        seqid    source type start end score strand
## 268 contig_21211_98_649_- contig_21211 getorf_JV gene    98 649     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 268     1        552       1032 YP_009666983.1    0.431              176
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 268         98         0      1  176    137  310 3.072e-39      148
##                          annotation
## 268 nucleoprotein [Ganda bee virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                  orf_name        seqid    source type start end score strand
## 268 contig_21211_98_649_- contig_21211 getorf_JV gene    98 649     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 268     1        552       1032 YP_009666983.1    0.431              176
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 268         98         0      1  176    137  310 3.072e-39      148
##                          annotation
## 268 nucleoprotein [Ganda bee virus]

we built a phylogeny based on the unique protein (nucleoprotein) :

p = plot_phylogeny("../phylogenies/contig_21211_98_649_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_21211_98_649.pdf", plot = p, width = 8)
## Saving 8 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Surprisingly, we found Drosophila sequences ! Need to check that ! Orthophasmavirus are segmented viruses (3 segments 2.2kb, 6.7kb and 2.8kb).

See files Orthophasmivirus_Drosophila*

HGT in Drosophila?

With nr homologs (ncbi queried on 22 march 2023)

p = plot_phylogeny("../phylogenies/Ganda_nr_short-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,7)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/Ganda_nr_short-PhyML_tree.pdf", plot = p, width = 12, height = 11)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Hubei dimarhabdovirus 2-like in L. heterotoma

Define the corresponding contigs :

contig_set=paste0("contig_", c(21655))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Rhabdoviridae2"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Rhabdoviridae2")
##                    orf_name        seqid    source type start  end score strand
## 278   contig_21655_43_606_+ contig_21655 getorf_JV gene    43  606     .   TRUE
## 279 contig_21655_652_1020_+ contig_21655 getorf_JV gene   652 1020     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 278     1        564       1021 YP_009337067.1    0.439              188
## 279     1        369       1021           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 278         98         0      1  188    246  420 1.026e-39      150
## 279         NA        NA     NA   NA     NA   NA        NA       NA
##                                                 annotation
## 278 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 279                                                   <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 278   contig_21655_43_606_+ contig_21655 getorf_JV gene    43  606     .   TRUE
## 279 contig_21655_652_1020_+ contig_21655 getorf_JV gene   652 1020     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 278     1        564       1021 YP_009337067.1    0.439              188
## 279     1        369       1021           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 278         98         0      1  188    246  420 1.026e-39      150
## 279         NA        NA     NA   NA     NA   NA        NA       NA
##                                                 annotation
## 278 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 279                                                   <NA>

we built a phylogeny based on the unique protein (nucleoprotein) :

p = plot_phylogeny("../phylogenies/contig_21655_43_606_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,7)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_21655_43_606_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Takaungu virus-like in L. heterotoma

See Webster et al (2016) Evolutionary Bioinformatics. Detected in D. melanogaster.

rna genome. related to Gentian Kobu-sho-associated virus (reported to be dsrna74) and a virus-like transcript from Conwentzia psociformis. Distantly related to soybean cyst nematode virus 5 and the flavivirus-like Xinzhou spider virus 2 (ref. 75). Derived from pools E and K of Webster et al.31, this virus incorporates flavivirus-like sequence KP757925 that was previously reported there. [Two un-joined contigs of 2.3 kbp and 3.9 kbp encoding a putative polyprotein]

Define the corresponding contigs :

contig_set=paste0("contig_", c(11017))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Flaviviridae1_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Flaviviridae1_L.h")
##                 orf_name        seqid    source type start  end score strand
## 30 contig_11017_3_1493_- contig_11017 getorf_JV gene     3 1493     .  FALSE
##    phase attributes seq_length subject_id identity alignment_length mismatches
## 30     1       1491       1557 AMO03219.1    0.316              173        115
##    gap_opens qstart qend sstart send   evalue bitscore
## 30         0    327  495     27  199 8.94e-16       86
##                                        annotation
## 30 putative polyprotein, partial [Takaungu virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                 orf_name        seqid    source type start  end score strand
## 30 contig_11017_3_1493_- contig_11017 getorf_JV gene     3 1493     .  FALSE
##    phase attributes seq_length subject_id identity alignment_length mismatches
## 30     1       1491       1557 AMO03219.1    0.316              173        115
##    gap_opens qstart qend sstart send   evalue bitscore
## 30         0    327  495     27  199 8.94e-16       86
##                                        annotation
## 30 putative polyprotein, partial [Takaungu virus]

No phylogeny was built since only one homologous sequence was identified.

Rice gall dwarf virus, Homalodisca vitripennis reovirus like virus in Trichopria sp.

Define the corresponding contigs :

contig_set=paste0("contig_", c(12726, 18151, 8245, 7089, 6361, 13248, 6912, 16886, 14483))
contig_set_unassigned=paste0("contig_", c(21782, 17382, 12974, 11366, 20468, 15565, 10231, 16123,6070, 4706,  9848, 11299, 18944))
# store for later fusion of corresponding lines
virus_list$"Reoviridae5_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae5_Tricho")
##                    orf_name        seqid    source type start  end score strand
## 16    contig_10231_1_1638_+ contig_10231 getorf_JV gene     1 1638     .   TRUE
## 17  contig_10231_205_1638_+ contig_10231 getorf_JV gene   205 1638     .   TRUE
## 42    contig_11299_2_1444_+ contig_11299 getorf_JV gene     2 1444     .   TRUE
## 43    contig_11299_8_1444_+ contig_11299 getorf_JV gene     8 1444     .   TRUE
## 44     contig_11366_1_159_- contig_11366 getorf_JV gene     1  159     .  FALSE
## 45  contig_11366_172_1461_- contig_11366 getorf_JV gene   172 1461     .  FALSE
## 46  contig_11366_172_1485_- contig_11366 getorf_JV gene   172 1485     .  FALSE
## 85    contig_12726_9_1265_+ contig_12726 getorf_JV gene     9 1265     .   TRUE
## 88   contig_12974_68_1387_- contig_12974 getorf_JV gene    68 1387     .  FALSE
## 89   contig_12974_68_1396_- contig_12974 getorf_JV gene    68 1396     .  FALSE
## 108 contig_13248_140_1234_+ contig_13248 getorf_JV gene   140 1234     .   TRUE
## 131   contig_14483_3_1301_+ contig_14483 getorf_JV gene     3 1301     .   TRUE
## 132  contig_14483_39_1301_+ contig_14483 getorf_JV gene    39 1301     .   TRUE
## 144   contig_15565_1_1245_+ contig_15565 getorf_JV gene     1 1245     .   TRUE
## 145  contig_15565_34_1245_+ contig_15565 getorf_JV gene    34 1245     .   TRUE
## 157   contig_16123_3_1100_- contig_16123 getorf_JV gene     3 1100     .  FALSE
## 158   contig_16123_3_1124_- contig_16123 getorf_JV gene     3 1124     .  FALSE
## 173  contig_16886_14_1174_- contig_16886 getorf_JV gene    14 1174     .  FALSE
## 194  contig_17382_160_897_+ contig_17382 getorf_JV gene   160  897     .   TRUE
## 195   contig_17382_97_897_+ contig_17382 getorf_JV gene    97  897     .   TRUE
## 218   contig_18151_3_1112_- contig_18151 getorf_JV gene     3 1112     .  FALSE
## 228  contig_18944_27_1100_+ contig_18944 getorf_JV gene    27 1100     .   TRUE
## 229   contig_18944_3_1100_+ contig_18944 getorf_JV gene     3 1100     .   TRUE
## 260  contig_20468_49_1020_- contig_20468 getorf_JV gene    49 1020     .  FALSE
## 261   contig_20468_49_918_- contig_20468 getorf_JV gene    49  918     .  FALSE
## 283 contig_21782_254_1018_- contig_21782 getorf_JV gene   254 1018     .  FALSE
## 284  contig_21782_254_991_- contig_21782 getorf_JV gene   254  991     .  FALSE
## 335   contig_4706_33_2876_+  contig_4706 getorf_JV gene    33 2876     .   TRUE
## 336   contig_4706_66_2876_+  contig_4706 getorf_JV gene    66 2876     .   TRUE
## 360  contig_6070_558_2207_-  contig_6070 getorf_JV gene   558 2207     .  FALSE
## 361  contig_6070_558_2231_-  contig_6070 getorf_JV gene   558 2231     .  FALSE
## 367   contig_6361_19_2178_+  contig_6361 getorf_JV gene    19 2178     .   TRUE
## 373   contig_6912_73_2133_-  contig_6912 getorf_JV gene    73 2133     .  FALSE
## 378    contig_7089_3_2078_-  contig_7089 getorf_JV gene     3 2078     .  FALSE
## 419   contig_8245_37_1761_-  contig_8245 getorf_JV gene    37 1761     .  FALSE
## 470    contig_9848_1_1524_+  contig_9848 getorf_JV gene     1 1524     .   TRUE
## 471   contig_9848_25_1524_+  contig_9848 getorf_JV gene    25 1524     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 16      2       1638       1638           <NA>       NA               NA
## 17      1       1434       1638           <NA>       NA               NA
## 42      2       1443       1532           <NA>       NA               NA
## 43      1       1437       1532           <NA>       NA               NA
## 44      2        159       1526           <NA>       NA               NA
## 45      1       1290       1526           <NA>       NA               NA
## 46      2       1314       1526           <NA>       NA               NA
## 85      1       1257       1266 YP_001111373.1    0.325              384
## 88      1       1320       1398           <NA>       NA               NA
## 89      2       1329       1398           <NA>       NA               NA
## 108     1       1095       1235 YP_009508276.1    0.350              347
## 131     2       1299       1302           <NA>       NA               NA
## 132     1       1263       1302           <NA>       NA               NA
## 144     2       1245       1245           <NA>       NA               NA
## 145     1       1212       1245           <NA>       NA               NA
## 157     1       1098       1217           <NA>       NA               NA
## 158     2       1122       1217           <NA>       NA               NA
## 173     1       1161       1183 YP_001111369.1    0.274              217
## 194     1        738       1162           <NA>       NA               NA
## 195     2        801       1162           <NA>       NA               NA
## 218     1       1110       1132 YP_002790884.1    0.334              375
## 228     1       1074       1102           <NA>       NA               NA
## 229     2       1098       1102           <NA>       NA               NA
## 260     2        972       1055           <NA>       NA               NA
## 261     1        870       1055           <NA>       NA               NA
## 283     2        765       1018           <NA>       NA               NA
## 284     1        738       1018           <NA>       NA               NA
## 335     2       2844       2876           <NA>       NA               NA
## 336     1       2811       2876           <NA>       NA               NA
## 360     1       1650       2359           <NA>       NA               NA
## 361     2       1674       2359           <NA>       NA               NA
## 367     1       2160       2277 YP_009389548.1    0.205              709
## 373     1       2061       2144 YP_009389548.1    0.202              659
## 378     1       2076       2101 YP_009508276.1    0.313              608
## 419     1       1725       1894    NP_620544.1    0.316              528
## 470     2       1524       1682           <NA>       NA               NA
## 471     1       1500       1682           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 16          NA        NA     NA   NA     NA   NA        NA       NA
## 17          NA        NA     NA   NA     NA   NA        NA       NA
## 42          NA        NA     NA   NA     NA   NA        NA       NA
## 43          NA        NA     NA   NA     NA   NA        NA       NA
## 44          NA        NA     NA   NA     NA   NA        NA       NA
## 45          NA        NA     NA   NA     NA   NA        NA       NA
## 46          NA        NA     NA   NA     NA   NA        NA       NA
## 85         246         0     55  419     50  433 5.282e-48      184
## 88          NA        NA     NA   NA     NA   NA        NA       NA
## 89          NA        NA     NA   NA     NA   NA        NA       NA
## 108        224         0     17  363    442  787 1.810e-66      236
## 131         NA        NA     NA   NA     NA   NA        NA       NA
## 132         NA        NA     NA   NA     NA   NA        NA       NA
## 144         NA        NA     NA   NA     NA   NA        NA       NA
## 145         NA        NA     NA   NA     NA   NA        NA       NA
## 157         NA        NA     NA   NA     NA   NA        NA       NA
## 158         NA        NA     NA   NA     NA   NA        NA       NA
## 173        157         0    143  359    916 1132 2.995e-15       83
## 194         NA        NA     NA   NA     NA   NA        NA       NA
## 195         NA        NA     NA   NA     NA   NA        NA       NA
## 218        240         0      8  368    403  777 1.924e-53      198
## 228         NA        NA     NA   NA     NA   NA        NA       NA
## 229         NA        NA     NA   NA     NA   NA        NA       NA
## 260         NA        NA     NA   NA     NA   NA        NA       NA
## 261         NA        NA     NA   NA     NA   NA        NA       NA
## 283         NA        NA     NA   NA     NA   NA        NA       NA
## 284         NA        NA     NA   NA     NA   NA        NA       NA
## 335         NA        NA     NA   NA     NA   NA        NA       NA
## 336         NA        NA     NA   NA     NA   NA        NA       NA
## 360         NA        NA     NA   NA     NA   NA        NA       NA
## 361         NA        NA     NA   NA     NA   NA        NA       NA
## 367        547         0      8  716    309  997 4.210e-25      118
## 373        509         0      1  659    358  996 8.667e-21      104
## 378        414         0     89  692     20  627 1.050e-79      287
## 419        345         0      1  528    869 1373 1.444e-64      238
## 470         NA        NA     NA   NA     NA   NA        NA       NA
## 471         NA        NA     NA   NA     NA   NA        NA       NA
##                                                            annotation
## 16                                                               <NA>
## 17                                                               <NA>
## 42                                                               <NA>
## 43                                                               <NA>
## 44                                                               <NA>
## 45                                                               <NA>
## 46                                                               <NA>
## 85      putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 88                                                               <NA>
## 89                                                               <NA>
## 108                                polypeptide P5 [Wound tumor virus]
## 131                                                              <NA>
## 132                                                              <NA>
## 144                                                              <NA>
## 145                                                              <NA>
## 157                                                              <NA>
## 158                                                              <NA>
## 173                                   RGDV P2 [Rice gall dwarf virus]
## 194                                                              <NA>
## 195                                                              <NA>
## 218    RNA-directed RNA polymerase [Homalodisca vitripennis reovirus]
## 228                                                              <NA>
## 229                                                              <NA>
## 260                                                              <NA>
## 261                                                              <NA>
## 283                                                              <NA>
## 284                                                              <NA>
## 335                                                              <NA>
## 336                                                              <NA>
## 360                                                              <NA>
## 361                                                              <NA>
## 367 putative major core protein [Aedes camptorhynchus reo-like virus]
## 373 putative major core protein [Aedes camptorhynchus reo-like virus]
## 378                                polypeptide P5 [Wound tumor virus]
## 419                   RNA-dependent RNA polymerase [Rice dwarf virus]
## 470                                                              <NA>
## 471                                                              <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 16    contig_10231_1_1638_+ contig_10231 getorf_JV gene     1 1638     .   TRUE
## 17  contig_10231_205_1638_+ contig_10231 getorf_JV gene   205 1638     .   TRUE
## 42    contig_11299_2_1444_+ contig_11299 getorf_JV gene     2 1444     .   TRUE
## 43    contig_11299_8_1444_+ contig_11299 getorf_JV gene     8 1444     .   TRUE
## 44     contig_11366_1_159_- contig_11366 getorf_JV gene     1  159     .  FALSE
## 45  contig_11366_172_1461_- contig_11366 getorf_JV gene   172 1461     .  FALSE
## 46  contig_11366_172_1485_- contig_11366 getorf_JV gene   172 1485     .  FALSE
## 85    contig_12726_9_1265_+ contig_12726 getorf_JV gene     9 1265     .   TRUE
## 88   contig_12974_68_1387_- contig_12974 getorf_JV gene    68 1387     .  FALSE
## 89   contig_12974_68_1396_- contig_12974 getorf_JV gene    68 1396     .  FALSE
## 108 contig_13248_140_1234_+ contig_13248 getorf_JV gene   140 1234     .   TRUE
## 131   contig_14483_3_1301_+ contig_14483 getorf_JV gene     3 1301     .   TRUE
## 132  contig_14483_39_1301_+ contig_14483 getorf_JV gene    39 1301     .   TRUE
## 144   contig_15565_1_1245_+ contig_15565 getorf_JV gene     1 1245     .   TRUE
## 145  contig_15565_34_1245_+ contig_15565 getorf_JV gene    34 1245     .   TRUE
## 157   contig_16123_3_1100_- contig_16123 getorf_JV gene     3 1100     .  FALSE
## 158   contig_16123_3_1124_- contig_16123 getorf_JV gene     3 1124     .  FALSE
## 173  contig_16886_14_1174_- contig_16886 getorf_JV gene    14 1174     .  FALSE
## 194  contig_17382_160_897_+ contig_17382 getorf_JV gene   160  897     .   TRUE
## 195   contig_17382_97_897_+ contig_17382 getorf_JV gene    97  897     .   TRUE
## 218   contig_18151_3_1112_- contig_18151 getorf_JV gene     3 1112     .  FALSE
## 228  contig_18944_27_1100_+ contig_18944 getorf_JV gene    27 1100     .   TRUE
## 229   contig_18944_3_1100_+ contig_18944 getorf_JV gene     3 1100     .   TRUE
## 260  contig_20468_49_1020_- contig_20468 getorf_JV gene    49 1020     .  FALSE
## 261   contig_20468_49_918_- contig_20468 getorf_JV gene    49  918     .  FALSE
## 283 contig_21782_254_1018_- contig_21782 getorf_JV gene   254 1018     .  FALSE
## 284  contig_21782_254_991_- contig_21782 getorf_JV gene   254  991     .  FALSE
## 335   contig_4706_33_2876_+  contig_4706 getorf_JV gene    33 2876     .   TRUE
## 336   contig_4706_66_2876_+  contig_4706 getorf_JV gene    66 2876     .   TRUE
## 360  contig_6070_558_2207_-  contig_6070 getorf_JV gene   558 2207     .  FALSE
## 361  contig_6070_558_2231_-  contig_6070 getorf_JV gene   558 2231     .  FALSE
## 367   contig_6361_19_2178_+  contig_6361 getorf_JV gene    19 2178     .   TRUE
## 373   contig_6912_73_2133_-  contig_6912 getorf_JV gene    73 2133     .  FALSE
## 378    contig_7089_3_2078_-  contig_7089 getorf_JV gene     3 2078     .  FALSE
## 419   contig_8245_37_1761_-  contig_8245 getorf_JV gene    37 1761     .  FALSE
## 470    contig_9848_1_1524_+  contig_9848 getorf_JV gene     1 1524     .   TRUE
## 471   contig_9848_25_1524_+  contig_9848 getorf_JV gene    25 1524     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 16      2       1638       1638           <NA>       NA               NA
## 17      1       1434       1638           <NA>       NA               NA
## 42      2       1443       1532           <NA>       NA               NA
## 43      1       1437       1532           <NA>       NA               NA
## 44      2        159       1526           <NA>       NA               NA
## 45      1       1290       1526           <NA>       NA               NA
## 46      2       1314       1526           <NA>       NA               NA
## 85      1       1257       1266 YP_001111373.1    0.325              384
## 88      1       1320       1398           <NA>       NA               NA
## 89      2       1329       1398           <NA>       NA               NA
## 108     1       1095       1235 YP_009508276.1    0.350              347
## 131     2       1299       1302           <NA>       NA               NA
## 132     1       1263       1302           <NA>       NA               NA
## 144     2       1245       1245           <NA>       NA               NA
## 145     1       1212       1245           <NA>       NA               NA
## 157     1       1098       1217           <NA>       NA               NA
## 158     2       1122       1217           <NA>       NA               NA
## 173     1       1161       1183 YP_001111369.1    0.274              217
## 194     1        738       1162           <NA>       NA               NA
## 195     2        801       1162           <NA>       NA               NA
## 218     1       1110       1132 YP_002790884.1    0.334              375
## 228     1       1074       1102           <NA>       NA               NA
## 229     2       1098       1102           <NA>       NA               NA
## 260     2        972       1055           <NA>       NA               NA
## 261     1        870       1055           <NA>       NA               NA
## 283     2        765       1018           <NA>       NA               NA
## 284     1        738       1018           <NA>       NA               NA
## 335     2       2844       2876           <NA>       NA               NA
## 336     1       2811       2876           <NA>       NA               NA
## 360     1       1650       2359           <NA>       NA               NA
## 361     2       1674       2359           <NA>       NA               NA
## 367     1       2160       2277 YP_009389548.1    0.205              709
## 373     1       2061       2144 YP_009389548.1    0.202              659
## 378     1       2076       2101 YP_009508276.1    0.313              608
## 419     1       1725       1894    NP_620544.1    0.316              528
## 470     2       1524       1682           <NA>       NA               NA
## 471     1       1500       1682           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 16          NA        NA     NA   NA     NA   NA        NA       NA
## 17          NA        NA     NA   NA     NA   NA        NA       NA
## 42          NA        NA     NA   NA     NA   NA        NA       NA
## 43          NA        NA     NA   NA     NA   NA        NA       NA
## 44          NA        NA     NA   NA     NA   NA        NA       NA
## 45          NA        NA     NA   NA     NA   NA        NA       NA
## 46          NA        NA     NA   NA     NA   NA        NA       NA
## 85         246         0     55  419     50  433 5.282e-48      184
## 88          NA        NA     NA   NA     NA   NA        NA       NA
## 89          NA        NA     NA   NA     NA   NA        NA       NA
## 108        224         0     17  363    442  787 1.810e-66      236
## 131         NA        NA     NA   NA     NA   NA        NA       NA
## 132         NA        NA     NA   NA     NA   NA        NA       NA
## 144         NA        NA     NA   NA     NA   NA        NA       NA
## 145         NA        NA     NA   NA     NA   NA        NA       NA
## 157         NA        NA     NA   NA     NA   NA        NA       NA
## 158         NA        NA     NA   NA     NA   NA        NA       NA
## 173        157         0    143  359    916 1132 2.995e-15       83
## 194         NA        NA     NA   NA     NA   NA        NA       NA
## 195         NA        NA     NA   NA     NA   NA        NA       NA
## 218        240         0      8  368    403  777 1.924e-53      198
## 228         NA        NA     NA   NA     NA   NA        NA       NA
## 229         NA        NA     NA   NA     NA   NA        NA       NA
## 260         NA        NA     NA   NA     NA   NA        NA       NA
## 261         NA        NA     NA   NA     NA   NA        NA       NA
## 283         NA        NA     NA   NA     NA   NA        NA       NA
## 284         NA        NA     NA   NA     NA   NA        NA       NA
## 335         NA        NA     NA   NA     NA   NA        NA       NA
## 336         NA        NA     NA   NA     NA   NA        NA       NA
## 360         NA        NA     NA   NA     NA   NA        NA       NA
## 361         NA        NA     NA   NA     NA   NA        NA       NA
## 367        547         0      8  716    309  997 4.210e-25      118
## 373        509         0      1  659    358  996 8.667e-21      104
## 378        414         0     89  692     20  627 1.050e-79      287
## 419        345         0      1  528    869 1373 1.444e-64      238
## 470         NA        NA     NA   NA     NA   NA        NA       NA
## 471         NA        NA     NA   NA     NA   NA        NA       NA
##                                                            annotation
## 16                                                               <NA>
## 17                                                               <NA>
## 42                                                               <NA>
## 43                                                               <NA>
## 44                                                               <NA>
## 45                                                               <NA>
## 46                                                               <NA>
## 85      putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 88                                                               <NA>
## 89                                                               <NA>
## 108                                polypeptide P5 [Wound tumor virus]
## 131                                                              <NA>
## 132                                                              <NA>
## 144                                                              <NA>
## 145                                                              <NA>
## 157                                                              <NA>
## 158                                                              <NA>
## 173                                   RGDV P2 [Rice gall dwarf virus]
## 194                                                              <NA>
## 195                                                              <NA>
## 218    RNA-directed RNA polymerase [Homalodisca vitripennis reovirus]
## 228                                                              <NA>
## 229                                                              <NA>
## 260                                                              <NA>
## 261                                                              <NA>
## 283                                                              <NA>
## 284                                                              <NA>
## 335                                                              <NA>
## 336                                                              <NA>
## 360                                                              <NA>
## 361                                                              <NA>
## 367 putative major core protein [Aedes camptorhynchus reo-like virus]
## 373 putative major core protein [Aedes camptorhynchus reo-like virus]
## 378                                polypeptide P5 [Wound tumor virus]
## 419                   RNA-dependent RNA polymerase [Rice dwarf virus]
## 470                                                              <NA>
## 471                                                              <NA>

The unannotated contigs show no hits based on blastx on nr. However, it has a nice ORF (with gertorf option 0). Include them :

Rice gall dwarf virus (RGDV), a member of the family Reoviridae, causes repeated epidemics in rice fields in southern China. An RGDV isolate collected from Guangdong Province (southern China) is mainly transmitted by leafhopper vector Recilia dorsalis in a persistent-propagative manner. The infection by RGDV induces the formation of virus-containing tubules in the plant host and insect vector. RGDV is an icosahedral double-layer particle approximately 65–70 nm in diameter, with a 12-segmented dsRNA genome (Moriyasu et al., 2000, 2007; Miyazaki et al., 2005; Zhang et al., 2008). https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3719018/

Same structure for Homalodisca vitripennis reovirus https://www.genome.jp/virushostdb/411854

We got three contigs that span the major part of the RdRp :

YP_001111373.1 is the RdRp from RGDV (1458aa long). contig_12726 covers amino acids of YP_001111373.1 from position 50 to 433 contig_18151 covers amino acids of YP_001111373.1 from position 407 to 781 contig_8245 covers amino acids of YP_001111373.1 from position 870 to 1386

Five other contigs encode other proteins.

We merge these three parts of the RdRp and build a phylogeny :

p = plot_phylogeny("../phylogenies/contig_8245_37_1761_-_fused.tree", taxo_info = wta_taxo_info)
p = p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8245_37_1761_-_fused.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Hubei virga-like virus 9 in Trichopria sp.

Define the corresponding contigs :

contig_set=paste0("contig_", c(923))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Virga_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Virga_Tricho")
##                    orf_name      seqid    source type start   end score strand
## 438  contig_923_1133_1591_- contig_923 getorf_JV gene  1133  1591     .  FALSE
## 439 contig_923_1627_10098_- contig_923 getorf_JV gene  1627 10098     .  FALSE
## 440    contig_923_282_659_- contig_923 getorf_JV gene   282   659     .  FALSE
## 441   contig_923_694_1101_- contig_923 getorf_JV gene   694  1101     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 438     1        459      10210           <NA>       NA               NA
## 439     1       8472      10210 YP_009337659.1    0.261             2633
## 440     1        378      10210           <NA>       NA               NA
## 441     1        408      10210 YP_009388489.1    0.310              115
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 438         NA        NA     NA   NA     NA   NA         NA       NA
## 439       1670         0    188 2820    179 2439 5.156e-176      619
## 440         NA        NA     NA   NA     NA   NA         NA       NA
## 441         75         0      7  116     17  131  6.048e-04       44
##                                           annotation
## 438                                             <NA>
## 439 hypothetical protein [Hubei virga-like virus 21]
## 440                                             <NA>
## 441                      ORF4 [Lake Sinai Virus SA1]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name      seqid    source type start   end score strand
## 438  contig_923_1133_1591_- contig_923 getorf_JV gene  1133  1591     .  FALSE
## 439 contig_923_1627_10098_- contig_923 getorf_JV gene  1627 10098     .  FALSE
## 440    contig_923_282_659_- contig_923 getorf_JV gene   282   659     .  FALSE
## 441   contig_923_694_1101_- contig_923 getorf_JV gene   694  1101     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 438     1        459      10210           <NA>       NA               NA
## 439     1       8472      10210 YP_009337659.1    0.261             2633
## 440     1        378      10210           <NA>       NA               NA
## 441     1        408      10210 YP_009388489.1    0.310              115
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 438         NA        NA     NA   NA     NA   NA         NA       NA
## 439       1670         0    188 2820    179 2439 5.156e-176      619
## 440         NA        NA     NA   NA     NA   NA         NA       NA
## 441         75         0      7  116     17  131  6.048e-04       44
##                                           annotation
## 438                                             <NA>
## 439 hypothetical protein [Hubei virga-like virus 21]
## 440                                             <NA>
## 441                      ORF4 [Lake Sinai Virus SA1]

Voir Kondo et al. 2019

Virga-like viruses are non segmented +ssRNA viruses. up to 10kb. It looks our assembly is complete.

contig_923_1627_10098 shows the expected domains (see fig. 2 of Kondo et al. 2019):

domains found in contig_923, third ORF

We built a phylogeny based on RdRP only :

p = plot_phylogeny(file = "../phylogenies/contig_923_1627_10098_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
#p = p + xlim(0,15) # adjust x axis
ggsave(filename = "../phylogenies/contig_923_1627_10098_-.pdf", plot = p, width = 8, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

It clusters with viruses associated with several Drosophila species : -Beult virus is a positive strand RNA virus described from D. suzukii by Medd et al. 2018. - Bofa virus and Buckhurst virus are viruses of D. melanogaster and D. obscura, respectively (Webster et al. 2016)

Wuhan mivirus-like in Trichopria

Define the corresponding contigs :

contig_set=paste0("contig_", c(22765, 13828))
contig_set_unassigned=paste0("contig_", c(10503, 10178))
# store for later fusion of corresponding lines
virus_list$"Chuviridae4_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Chuvirus2_tricho")
##                    orf_name        seqid    source type start  end score strand
## 14  contig_10178_129_1604_- contig_10178 getorf_JV gene   129 1604     .  FALSE
## 15  contig_10178_129_1631_- contig_10178 getorf_JV gene   129 1631     .  FALSE
## 21    contig_10503_2_1444_+ contig_10503 getorf_JV gene     2 1444     .   TRUE
## 22    contig_10503_8_1444_+ contig_10503 getorf_JV gene     8 1444     .   TRUE
## 120  contig_13828_62_1273_- contig_13828 getorf_JV gene    62 1273     .  FALSE
## 298   contig_22765_27_590_+ contig_22765 getorf_JV gene    27  590     .   TRUE
## 299 contig_22765_642_5144_+ contig_22765 getorf_JV gene   642 5144     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 14      1       1476       1643           <NA>       NA               NA
## 15      2       1503       1643           <NA>       NA               NA
## 21      2       1443       1610           <NA>       NA               NA
## 22      1       1437       1610           <NA>       NA               NA
## 120     1       1212       1340 YP_009337904.1    0.284              369
## 298     1        564       5144           <NA>       NA               NA
## 299     1       4503       5144 YP_009337089.1    0.407             1308
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 14          NA        NA     NA   NA     NA   NA        NA       NA
## 15          NA        NA     NA   NA     NA   NA        NA       NA
## 21          NA        NA     NA   NA     NA   NA        NA       NA
## 22          NA        NA     NA   NA     NA   NA        NA       NA
## 120        263         0      1  369   1791 2158 9.707e-41      162
## 298         NA        NA     NA   NA     NA   NA        NA       NA
## 299        773         0      8 1312    210 1517 0.000e+00     1041
##                                                     annotation
## 14                                                        <NA>
## 15                                                        <NA>
## 21                                                        <NA>
## 22                                                        <NA>
## 120 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]
## 298                                                       <NA>
## 299 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
## Saving 7 x 5 in image
res[1]
## [[1]]

10503, 10178 have no hit but contain nice ORFs and are thus included.

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 14  contig_10178_129_1604_- contig_10178 getorf_JV gene   129 1604     .  FALSE
## 15  contig_10178_129_1631_- contig_10178 getorf_JV gene   129 1631     .  FALSE
## 21    contig_10503_2_1444_+ contig_10503 getorf_JV gene     2 1444     .   TRUE
## 22    contig_10503_8_1444_+ contig_10503 getorf_JV gene     8 1444     .   TRUE
## 120  contig_13828_62_1273_- contig_13828 getorf_JV gene    62 1273     .  FALSE
## 298   contig_22765_27_590_+ contig_22765 getorf_JV gene    27  590     .   TRUE
## 299 contig_22765_642_5144_+ contig_22765 getorf_JV gene   642 5144     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 14      1       1476       1643           <NA>       NA               NA
## 15      2       1503       1643           <NA>       NA               NA
## 21      2       1443       1610           <NA>       NA               NA
## 22      1       1437       1610           <NA>       NA               NA
## 120     1       1212       1340 YP_009337904.1    0.284              369
## 298     1        564       5144           <NA>       NA               NA
## 299     1       4503       5144 YP_009337089.1    0.407             1308
##     mismatches gap_opens qstart qend sstart send    evalue bitscore
## 14          NA        NA     NA   NA     NA   NA        NA       NA
## 15          NA        NA     NA   NA     NA   NA        NA       NA
## 21          NA        NA     NA   NA     NA   NA        NA       NA
## 22          NA        NA     NA   NA     NA   NA        NA       NA
## 120        263         0      1  369   1791 2158 9.707e-41      162
## 298         NA        NA     NA   NA     NA   NA        NA       NA
## 299        773         0      8 1312    210 1517 0.000e+00     1041
##                                                     annotation
## 14                                                        <NA>
## 15                                                        <NA>
## 21                                                        <NA>
## 22                                                        <NA>
## 120 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]
## 298                                                       <NA>
## 299 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]

The major part of the RdRp is covered, but is split between two contigs :

contig_22765 covers protein YP_009337904.1 (RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1], 2172 aa long) from 201 to 1698 contig_13828_62_1273_- covers protein YP_009337904.1 (RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]) from 1791 to 2158

Hubei chuvirus-like virus 1 is composed of two segments 6873bp and 3958bp.

pfam00946, Mononegavirales RNA dependent RNA polymerase ;Members of the Mononegavirales including the Paramyxoviridae, like other non-segmented negative strand RNA viruses, have an RNA-dependent RNA polymerase composed of two subunits, a large protein L and a phosphoprotein P. This is a protein family of the L protein. The L protein confers the RNA polymerase activity on the complex. The P protein acts as a transcription factor.

The P-protein is lacking in our dataset? Partial genome.

We built a phylogeny based on RdRP domain only :

p = plot_phylogeny(file = "../phylogenies/contig_22765_642_5144_+_with_homologs-BioNJ_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,1.5) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_22765_642_5144_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Wuhan insect virus 22 like in Trichopria

Define the corresponding contigs :

contig_set=paste0("contig_", c(8703, 6917, 9411))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Partiti-like4_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Partiti-like4_Tricho")
##                   orf_name       seqid    source type start  end score strand
## 374 contig_6917_680_2107_- contig_6917 getorf_JV gene   680 2107     .  FALSE
## 422  contig_8703_47_1768_+ contig_8703 getorf_JV gene    47 1768     .   TRUE
## 454  contig_9411_62_1708_- contig_9411 getorf_JV gene    62 1708     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 374     1       1428       2143 YP_009346040.1    0.380              350
## 422     1       1722       1827 YP_009346039.1    0.488              538
## 454     1       1647       1737 YP_009346040.1    0.390              398
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 374        216         0     99  448    124  473  1.201e-60      223
## 422        274         0     18  554     23  560 1.014e-167      540
## 454        237         0    136  533    112  501  7.746e-79      280
##                                       annotation
## 374 hypothetical protein [Wuhan insect virus 22]
## 422                 RdRp [Wuhan insect virus 22]
## 454 hypothetical protein [Wuhan insect virus 22]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name       seqid    source type start  end score strand
## 374 contig_6917_680_2107_- contig_6917 getorf_JV gene   680 2107     .  FALSE
## 422  contig_8703_47_1768_+ contig_8703 getorf_JV gene    47 1768     .   TRUE
## 454  contig_9411_62_1708_- contig_9411 getorf_JV gene    62 1708     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 374     1       1428       2143 YP_009346040.1    0.380              350
## 422     1       1722       1827 YP_009346039.1    0.488              538
## 454     1       1647       1737 YP_009346040.1    0.390              398
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 374        216         0     99  448    124  473  1.201e-60      223
## 422        274         0     18  554     23  560 1.014e-167      540
## 454        237         0    136  533    112  501  7.746e-79      280
##                                       annotation
## 374 hypothetical protein [Wuhan insect virus 22]
## 422                 RdRp [Wuhan insect virus 22]
## 454 hypothetical protein [Wuhan insect virus 22]

Wuhan insect virus 22 is composed of two segments a 1869bp coding the RdRp, and a 1766bp coding an hypothetical protein. It is unclear whether the three contigs belong to the same virus or if only one of the two “hypothetical protein” encoding contigs do.

We built a phylogeny based on RdRP :

p = plot_phylogeny(file = "../phylogenies/contig_8703_47_1768_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8703_47_1768_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

protein contig_6917_680_2107 contig_9411_62_1708_- has lots of eukaryotic hits… Check that ! contig_6917_680_2107_nr.fa meme chose…

contig 9411, HGT ?

Hubei dimarhabdovirus like in Trichopria sp.

Define the corresponding contigs :

contig_set=paste0("contig_", c(5571, 3788, 11939, 20619, 10949))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Rhabdoviridae1_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Rhabdoviridae1_Tricho")
##                    orf_name        seqid    source type start  end score strand
## 28   contig_10949_44_1561_+ contig_10949 getorf_JV gene    44 1561     .   TRUE
## 65    contig_11939_3_1463_- contig_11939 getorf_JV gene     3 1463     .  FALSE
## 262   contig_20619_1_1005_- contig_20619 getorf_JV gene     1 1005     .  FALSE
## 332   contig_3788_54_3266_+  contig_3788 getorf_JV gene    54 3266     .   TRUE
## 350 contig_5571_1198_1908_+  contig_5571 getorf_JV gene  1198 1908     .   TRUE
## 351 contig_5571_2006_2527_+  contig_5571 getorf_JV gene  2006 2527     .   TRUE
## 352  contig_5571_283_1155_+  contig_5571 getorf_JV gene   283 1155     .   TRUE
## 353    contig_5571_91_246_+  contig_5571 getorf_JV gene    91  246     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 28      1       1518       1563 YP_009337071.1    0.803              505
## 65      1       1461       1478 YP_009337071.1    0.577              487
## 262     1       1005       1051 YP_009337067.1    0.556              329
## 332     1       3213       3390 YP_009301743.1    0.525             1050
## 350     1        711       2527 YP_009337069.1    0.486              220
## 351     1        522       2527 YP_009337070.1    0.443              166
## 352     1        873       2527 YP_009337068.1    0.256              288
## 353     1        156       2527 YP_009337067.1    0.529               51
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 28          99         0      2  506    531 1035 6.814e-278      855
## 65         206         0      1  487     11  497 6.503e-195      614
## 262        146         0      1  329      1  329 3.026e-119      387
## 332        498         0      1 1050   1049 2098  0.000e+00     1120
## 350        109         0     17  236     11  223  2.022e-63      221
## 351         91         0      4  168      7  172  1.349e-38      146
## 352        202         0      1  288      1  272  2.934e-08       60
## 353         24         0      2   52    370  420  7.671e-12       62
##                                                       annotation
## 28  RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 65  RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 262       putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 332          RNA-dependent RNA polymerase [Wuhan Insect virus 7]
## 350       hypothetical protein 3 [Hubei dimarhabdovirus virus 2]
## 351        putative glycoprotein [Hubei dimarhabdovirus virus 2]
## 352       hypothetical protein 2 [Hubei dimarhabdovirus virus 2]
## 353       putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 28   contig_10949_44_1561_+ contig_10949 getorf_JV gene    44 1561     .   TRUE
## 65    contig_11939_3_1463_- contig_11939 getorf_JV gene     3 1463     .  FALSE
## 262   contig_20619_1_1005_- contig_20619 getorf_JV gene     1 1005     .  FALSE
## 332   contig_3788_54_3266_+  contig_3788 getorf_JV gene    54 3266     .   TRUE
## 350 contig_5571_1198_1908_+  contig_5571 getorf_JV gene  1198 1908     .   TRUE
## 351 contig_5571_2006_2527_+  contig_5571 getorf_JV gene  2006 2527     .   TRUE
## 352  contig_5571_283_1155_+  contig_5571 getorf_JV gene   283 1155     .   TRUE
## 353    contig_5571_91_246_+  contig_5571 getorf_JV gene    91  246     .   TRUE
##     phase attributes seq_length     subject_id identity alignment_length
## 28      1       1518       1563 YP_009337071.1    0.803              505
## 65      1       1461       1478 YP_009337071.1    0.577              487
## 262     1       1005       1051 YP_009337067.1    0.556              329
## 332     1       3213       3390 YP_009301743.1    0.525             1050
## 350     1        711       2527 YP_009337069.1    0.486              220
## 351     1        522       2527 YP_009337070.1    0.443              166
## 352     1        873       2527 YP_009337068.1    0.256              288
## 353     1        156       2527 YP_009337067.1    0.529               51
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 28          99         0      2  506    531 1035 6.814e-278      855
## 65         206         0      1  487     11  497 6.503e-195      614
## 262        146         0      1  329      1  329 3.026e-119      387
## 332        498         0      1 1050   1049 2098  0.000e+00     1120
## 350        109         0     17  236     11  223  2.022e-63      221
## 351         91         0      4  168      7  172  1.349e-38      146
## 352        202         0      1  288      1  272  2.934e-08       60
## 353         24         0      2   52    370  420  7.671e-12       62
##                                                       annotation
## 28  RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 65  RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 262       putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 332          RNA-dependent RNA polymerase [Wuhan Insect virus 7]
## 350       hypothetical protein 3 [Hubei dimarhabdovirus virus 2]
## 351        putative glycoprotein [Hubei dimarhabdovirus virus 2]
## 352       hypothetical protein 2 [Hubei dimarhabdovirus virus 2]
## 353       putative nucleoprotein [Hubei dimarhabdovirus virus 2]

Hubei Dimarhabdovirus 2 is a non segmented virus 11332bp.

https://www.genome.jp/dbget-bin/www_bget?refseq:NC_033006

Our assembly is thus fragmented. However it seems to cover at least most of the RdRp:

YP_009337071.1 is the RdRp from Hubei dimarhabdovirus 2. Its has 2119 AA Contig 11939 encodes a protein that aligns with YP_009337071.1 from 11 to 497 (contains a RdRp domain) Contig 10949 encodes a protein that aligns with YP_009337071.1 from 531 to 1035 (contains a RdRp domain) contig_3788_54_3266_+ encodes a protein that aligns with YP_009337071.1 from 1048 to 2119 (contains a mRNA capping region and a viral-capping methyltransferase).

Contig 11939 domains Contig 10949 domains Contig 3788 domains

We built a phylogeny based on RdRP domain (orfs encoded by contigs 11939 and 10949 only):

p = plot_phylogeny(file = "../phylogenies/contig_3788_54_3266_+_FUSED-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,2) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_3788_54_3266_.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Chaq virus

Chaq virus was first decribed in Webster et al. Plos Biology (2015) from small RNA sequencing. they write :“Neither Chaq Virus nor Galbut_virus_D.mel Virus has high sequence similarity to known viruses, but both also cluster with invertebrate transcriptome-derived sequences. These may represent new virus lineages, or be weakly conserved genes in a known virus group”.

For the phylogeny, we included the sequences obtained from D. simulans (contig_7817). We also found sequences related to Chaq virus in D. subobscura/D. obscura (contig_15880).

The alignments revealed two parts : a part common to all proteins in the N terminal part, and a C terminal part specific to some sequences. After alignment, only the common N terminal part was used for phylogenetic reconstruction.

p = plot_phylogeny("../phylogenies/contig_13219_258_1214_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,1)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_13219_258_1214.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Chaq virus was always associated with Galbut_virus_D.mel in D. mel and D. sim, as observed by Shi et al. 2018 (Proc B), whereas it was associated with Vera-Hubei diptera virus 17 in D. sub D.obs.

Black queen cell virus like in

wuhan arthropod virus 2 is related to Black queen cell which suggest that this set of contigs may belong to the same genome (Shi et al. 2016 Nature). See fig S15.

Define the corresponding contigs :

contig_set=c("contig_7819", "contig_9727", "contig_22845")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Dicistroviridae_Pachy"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Dicistroviridae_Pachy")
##                    orf_name        seqid    source type start  end score strand
## 304 contig_22845_207_4232_+ contig_22845 getorf_JV gene   207 4232     .   TRUE
## 396 contig_7819_1403_1555_+  contig_7819 getorf_JV gene  1403 1555     .   TRUE
## 397    contig_7819_2_1354_-  contig_7819 getorf_JV gene     2 1354     .  FALSE
## 468    contig_9727_2_1339_-  contig_9727 getorf_JV gene     2 1339     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 304     1       4026       4457    NP_620565.1    0.398              834
## 396     1        153       1961           <NA>       NA               NA
## 397     1       1353       1961 YP_009342286.1    0.217              335
## 468     1       1338       1696    NP_620564.1    0.411              459
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 304        471         0    469 1302     20  802 3.251e-157      539
## 396         NA        NA     NA   NA     NA   NA         NA       NA
## 397        248         0    113  447    135  452  6.716e-05       51
## 468        257         0      3  440    872 1330 9.803e-103      345
##                                             annotation
## 304    structural polyprotein [Black queen cell virus]
## 396                                               <NA>
## 397   hypothetical protein 1 [Wuhan arthropod virus 2]
## 468 nonstructural polyprotein [Black queen cell virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 304 contig_22845_207_4232_+ contig_22845 getorf_JV gene   207 4232     .   TRUE
## 396 contig_7819_1403_1555_+  contig_7819 getorf_JV gene  1403 1555     .   TRUE
## 397    contig_7819_2_1354_-  contig_7819 getorf_JV gene     2 1354     .  FALSE
## 468    contig_9727_2_1339_-  contig_9727 getorf_JV gene     2 1339     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 304     1       4026       4457    NP_620565.1    0.398              834
## 396     1        153       1961           <NA>       NA               NA
## 397     1       1353       1961 YP_009342286.1    0.217              335
## 468     1       1338       1696    NP_620564.1    0.411              459
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 304        471         0    469 1302     20  802 3.251e-157      539
## 396         NA        NA     NA   NA     NA   NA         NA       NA
## 397        248         0    113  447    135  452  6.716e-05       51
## 468        257         0      3  440    872 1330 9.803e-103      345
##                                             annotation
## 304    structural polyprotein [Black queen cell virus]
## 396                                               <NA>
## 397   hypothetical protein 1 [Wuhan arthropod virus 2]
## 468 nonstructural polyprotein [Black queen cell virus]
res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 304 contig_22845_207_4232_+ contig_22845 getorf_JV gene   207 4232     .   TRUE
## 396 contig_7819_1403_1555_+  contig_7819 getorf_JV gene  1403 1555     .   TRUE
## 397    contig_7819_2_1354_-  contig_7819 getorf_JV gene     2 1354     .  FALSE
## 468    contig_9727_2_1339_-  contig_9727 getorf_JV gene     2 1339     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 304     1       4026       4457    NP_620565.1    0.398              834
## 396     1        153       1961           <NA>       NA               NA
## 397     1       1353       1961 YP_009342286.1    0.217              335
## 468     1       1338       1696    NP_620564.1    0.411              459
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 304        471         0    469 1302     20  802 3.251e-157      539
## 396         NA        NA     NA   NA     NA   NA         NA       NA
## 397        248         0    113  447    135  452  6.716e-05       51
## 468        257         0      3  440    872 1330 9.803e-103      345
##                                             annotation
## 304    structural polyprotein [Black queen cell virus]
## 396                                               <NA>
## 397   hypothetical protein 1 [Wuhan arthropod virus 2]
## 468 nonstructural polyprotein [Black queen cell virus]

domains in contig_22845

domains in contig_9727

no conserved domain were detected in c7819.

We built a phylogeny based on RdRp domain only of contig22845:

p = plot_phylogeny(file = "../phylogenies/contig_22845_207_4232_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,30)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_22845.pdf", plot = p, width = 12, height = 12)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

We built a phylogeny based on RdRp domain only of contig9727:

p = plot_phylogeny(file = "../phylogenies/contig_9727_2_1339_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
#p = p + xlim(0,30)
ggsave(filename = "../phylogenies/contig_9727.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Hubei_tetragnatha_maxillosa_virus_8, Wuhan_cricket_virus_2 in Pachycrepoideus sp.

THey are segmented viruses (4 segments for Hubei_tetragnatha_maxillosa_virus_8 and 6 segments for Wuhan_cricket_virus_2; see fig S31). Both viruses belong to the same clade see fig S11 (Shi et al. 2016). Unclear whether contig 15227 is part of this genome or of the Black Queen cell virus like.

Define the corresponding contigs :

contig_set=c("contig_14174", "contig_13124")
contig_set_unassigned="contig_15227"
# store for later fusion of corresponding lines
virus_list$"Partiti-like3_Pachy"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Hubei_tetragnatha_virus_Pachy")
##                   orf_name        seqid    source type start  end score strand
## 95  contig_13124_62_1330_- contig_13124 getorf_JV gene    62 1330     .  FALSE
## 127  contig_14174_3_1223_- contig_14174 getorf_JV gene     3 1223     .  FALSE
## 142  contig_15227_3_1181_- contig_15227 getorf_JV gene     3 1181     .  FALSE
## 143  contig_15227_3_1196_- contig_15227 getorf_JV gene     3 1196     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 95      1       1269       1389 YP_009345136.1    0.321              350
## 127     1       1221       1320 YP_009337885.1    0.522              404
## 142     1       1179       1262           <NA>       NA               NA
## 143     2       1194       1262           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 95         215         0      1  350     49  365  5.005e-47      181
## 127        193         0      4  407      9  412 5.523e-138      445
## 142         NA        NA     NA   NA     NA   NA         NA       NA
## 143         NA        NA     NA   NA     NA   NA         NA       NA
##                                       annotation
## 95  hypothetical protein [Wuhan cricket virus 2]
## 127   RdRp [Hubei tetragnatha maxillosa virus 8]
## 142                                         <NA>
## 143                                         <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name        seqid    source type start  end score strand
## 95  contig_13124_62_1330_- contig_13124 getorf_JV gene    62 1330     .  FALSE
## 127  contig_14174_3_1223_- contig_14174 getorf_JV gene     3 1223     .  FALSE
## 142  contig_15227_3_1181_- contig_15227 getorf_JV gene     3 1181     .  FALSE
## 143  contig_15227_3_1196_- contig_15227 getorf_JV gene     3 1196     .  FALSE
##     phase attributes seq_length     subject_id identity alignment_length
## 95      1       1269       1389 YP_009345136.1    0.321              350
## 127     1       1221       1320 YP_009337885.1    0.522              404
## 142     1       1179       1262           <NA>       NA               NA
## 143     2       1194       1262           <NA>       NA               NA
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 95         215         0      1  350     49  365  5.005e-47      181
## 127        193         0      4  407      9  412 5.523e-138      445
## 142         NA        NA     NA   NA     NA   NA         NA       NA
## 143         NA        NA     NA   NA     NA   NA         NA       NA
##                                       annotation
## 95  hypothetical protein [Wuhan cricket virus 2]
## 127   RdRp [Hubei tetragnatha maxillosa virus 8]
## 142                                         <NA>
## 143                                         <NA>

The unannotated contig show no hits based on blastx on nr. However, it has a nice ORF (with gertorf option 0). Include it

It is unclear whether these three contigs belong to the same genome. However, both Wuhan cricket virus 2 (the closest relative to contig_7819), and Hubei_tetragnatha_maxillosa_virus_8 (the closest relative to contig_14174) are segmented viruses :

https://www.genome.jp/virushostdb/1923697 (6 segments)

https://www.genome.jp/virushostdb/1923250 (4 segments)

Belong to Partiti-Picobirna. See fig 4 Shi et al. 

see : Unprecedented genomic diversity of RNA viruses in arthropods reveals the ancestry of negative-sense RNA viruses. Elife. 2015 Jan 29;4.

We built a phylogeny based on RdRp gene :

p = plot_phylogeny(file = "../phylogenies/contig_14174_3_1223_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_14174_3_1223_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

Known viruses

Eccles virus in D. subobscura

Define the corresponding contigs :

contig_set=paste0("contig_", c("19093", "12071", "23064", "8887", "6006", "3164", "5214"))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Eccles_virus_D.sub"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Eccles_virus_D.sub")
##                   orf_name        seqid    source type start  end score strand
## 66  contig_12071_65_1192_- contig_12071 getorf_JV gene    65 1192     .  FALSE
## 230   contig_19093_2_946_- contig_19093 getorf_JV gene     2  946     .  FALSE
## 309 contig_23064_38_2905_+ contig_23064 getorf_JV gene    38 2905     .   TRUE
## 327   contig_3164_1_3798_-  contig_3164 getorf_JV gene     1 3798     .  FALSE
## 345  contig_5214_65_2611_+  contig_5214 getorf_JV gene    65 2611     .   TRUE
## 358   contig_6006_4_2379_+  contig_6006 getorf_JV gene     4 2379     .   TRUE
## 428   contig_8887_1_1398_-  contig_8887 getorf_JV gene     1 1398     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 66      1       1128       1468 AWA82242.1    1.000              376          0
## 230     1        945       1098 AWA82237.1    0.765              316         74
## 309     1       2868       2915 AWA82240.1    1.000              956          0
## 327     1       3798       3909 AWA82238.1    0.998              803          2
## 345     1       2547       2671 AWA82239.1    0.967              849         28
## 358     1       2376       2380 AWA82237.1    0.998              792          2
## 428     1       1398       1517 AWA82241.1    0.993              466          3
##     gap_opens qstart qend sstart send     evalue bitscore
## 66          0      1  376    100  475 7.604e-247      758
## 230         0      1  315    130  445 4.258e-147      467
## 309         0      1  956    126 1081  0.000e+00     1917
## 327         0    320 1122      1  803  0.000e+00     1625
## 345         0      1  849    192 1040  0.000e+00     1639
## 358         0      1  792    543 1334  0.000e+00     1572
## 428         0      1  466    117  582 4.160e-311      949
##                                                        annotation
## 66                            hypothetical protein [Eccles virus]
## 230 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 309                           hypothetical protein [Eccles virus]
## 327                  hypothetical protein, partial [Eccles virus]
## 345                           hypothetical protein [Eccles virus]
## 358 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 428                           hypothetical protein [Eccles virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name        seqid    source type start  end score strand
## 66  contig_12071_65_1192_- contig_12071 getorf_JV gene    65 1192     .  FALSE
## 230   contig_19093_2_946_- contig_19093 getorf_JV gene     2  946     .  FALSE
## 309 contig_23064_38_2905_+ contig_23064 getorf_JV gene    38 2905     .   TRUE
## 327   contig_3164_1_3798_-  contig_3164 getorf_JV gene     1 3798     .  FALSE
## 345  contig_5214_65_2611_+  contig_5214 getorf_JV gene    65 2611     .   TRUE
## 358   contig_6006_4_2379_+  contig_6006 getorf_JV gene     4 2379     .   TRUE
## 428   contig_8887_1_1398_-  contig_8887 getorf_JV gene     1 1398     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 66      1       1128       1468 AWA82242.1    1.000              376          0
## 230     1        945       1098 AWA82237.1    0.765              316         74
## 309     1       2868       2915 AWA82240.1    1.000              956          0
## 327     1       3798       3909 AWA82238.1    0.998              803          2
## 345     1       2547       2671 AWA82239.1    0.967              849         28
## 358     1       2376       2380 AWA82237.1    0.998              792          2
## 428     1       1398       1517 AWA82241.1    0.993              466          3
##     gap_opens qstart qend sstart send     evalue bitscore
## 66          0      1  376    100  475 7.604e-247      758
## 230         0      1  315    130  445 4.258e-147      467
## 309         0      1  956    126 1081  0.000e+00     1917
## 327         0    320 1122      1  803  0.000e+00     1625
## 345         0      1  849    192 1040  0.000e+00     1639
## 358         0      1  792    543 1334  0.000e+00     1572
## 428         0      1  466    117  582 4.160e-311      949
##                                                        annotation
## 66                            hypothetical protein [Eccles virus]
## 230 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 309                           hypothetical protein [Eccles virus]
## 327                  hypothetical protein, partial [Eccles virus]
## 345                           hypothetical protein [Eccles virus]
## 358 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 428                           hypothetical protein [Eccles virus]

Eccles virus was reported by Medd et al. 2018 in D. suzukii where 6 segments were detected.

La Jolla virus in D. melanogaster, D. suzukii and D. sub/obscura

Define the corresponding contigs :

contig_set=c("contig_4993", "contig_5731","contig_22835", "contig_20871")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"LaJolla_virus"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "LaJolla_virus")
##                    orf_name        seqid    source type start  end score strand
## 265  contig_20871_140_337_- contig_20871 getorf_JV gene   140  337     .  FALSE
## 266 contig_20871_324_1043_+ contig_20871 getorf_JV gene   324 1043     .   TRUE
## 303   contig_22835_1_4500_- contig_22835 getorf_JV gene     1 4500     .  FALSE
## 343  contig_4993_881_2755_+  contig_4993 getorf_JV gene   881 2755     .   TRUE
## 354   contig_5731_64_1650_+  contig_5731 getorf_JV gene    64 1650     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 265     1        198       1043       <NA>       NA               NA         NA
## 266     1        720       1043 AWY11061.1    1.000              240          0
## 303     1       4500       4520 AKH40286.1    0.980             1500         30
## 343     1       1875       2757 AKH40286.1    0.985              624          9
## 354     1       1587       1652 AKH40286.1    0.994              529          3
##     gap_opens qstart qend sstart send     evalue bitscore
## 265        NA     NA   NA     NA   NA         NA       NA
## 266         0      1  240   2067 2306 2.273e-170      529
## 303         0      1 1500    635 2134  0.000e+00     3028
## 343         0      1  624      1  624  0.000e+00     1263
## 354         0      1  529   2330 2858  0.000e+00     1106
##                                         annotation
## 265                                           <NA>
## 266          putative polyprotein [La Jolla virus]
## 303 putative polyprotein, partial [La Jolla virus]
## 343 putative polyprotein, partial [La Jolla virus]
## 354 putative polyprotein, partial [La Jolla virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 265  contig_20871_140_337_- contig_20871 getorf_JV gene   140  337     .  FALSE
## 266 contig_20871_324_1043_+ contig_20871 getorf_JV gene   324 1043     .   TRUE
## 303   contig_22835_1_4500_- contig_22835 getorf_JV gene     1 4500     .  FALSE
## 343  contig_4993_881_2755_+  contig_4993 getorf_JV gene   881 2755     .   TRUE
## 354   contig_5731_64_1650_+  contig_5731 getorf_JV gene    64 1650     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 265     1        198       1043       <NA>       NA               NA         NA
## 266     1        720       1043 AWY11061.1    1.000              240          0
## 303     1       4500       4520 AKH40286.1    0.980             1500         30
## 343     1       1875       2757 AKH40286.1    0.985              624          9
## 354     1       1587       1652 AKH40286.1    0.994              529          3
##     gap_opens qstart qend sstart send     evalue bitscore
## 265        NA     NA   NA     NA   NA         NA       NA
## 266         0      1  240   2067 2306 2.273e-170      529
## 303         0      1 1500    635 2134  0.000e+00     3028
## 343         0      1  624      1  624  0.000e+00     1263
## 354         0      1  529   2330 2858  0.000e+00     1106
##                                         annotation
## 265                                           <NA>
## 266          putative polyprotein [La Jolla virus]
## 303 putative polyprotein, partial [La Jolla virus]
## 343 putative polyprotein, partial [La Jolla virus]
## 354 putative polyprotein, partial [La Jolla virus]

La Jolla virus was reported in Webster et al. 2016 and Medd et al. It is very frequent in D. melanogaster, also found in D. simulans and D. suzukii.

Craigies Hill virus in D. melanogaster

Define the corresponding contigs :

contig_set=c("contig_17257",  "contig_19996")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"CraigiesHill_virus_Dmel"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "CraigiesHill_virus_Dmel")
##                    orf_name        seqid    source type start  end score strand
## 185  contig_17257_75_1004_+ contig_17257 getorf_JV gene    75 1004     .   TRUE
## 243 contig_19996_246_1067_+ contig_19996 getorf_JV gene   246 1067     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 185     1        930       1167 AWY11116.1        1              310          0
## 243     1        822       1069 AKH67449.1        1              274          0
##     gap_opens qstart qend sstart send     evalue bitscore
## 185         0      1  310      1  310 1.117e-207      641
## 243         0      1  274    463  736 4.341e-183      568
##                                              annotation
## 185                          orf1 [Craigies Hill virus]
## 243 hypothetical protein, partial [Craigies Hill virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 185  contig_17257_75_1004_+ contig_17257 getorf_JV gene    75 1004     .   TRUE
## 243 contig_19996_246_1067_+ contig_19996 getorf_JV gene   246 1067     .   TRUE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 185     1        930       1167 AWY11116.1        1              310          0
## 243     1        822       1069 AKH67449.1        1              274          0
##     gap_opens qstart qend sstart send     evalue bitscore
## 185         0      1  310      1  310 1.117e-207      641
## 243         0      1  274    463  736 4.341e-183      568
##                                              annotation
## 185                          orf1 [Craigies Hill virus]
## 243 hypothetical protein, partial [Craigies Hill virus]

CraigiesHill_virus was reported in Webster et al. 2016 in D. melanogaster.

Muthill virus mostly in D. immigrans (~99%) 1 contig 10490bp

Define the corresponding contigs :

contig_set=c("contig_1269")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Muthill_virus_D.im"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Muthill_virus_D.im")
##                    orf_name       seqid    source type start   end score strand
## 79    contig_1269_106_456_- contig_1269 getorf_JV gene   106   456     .  FALSE
## 80  contig_1269_1060_1761_- contig_1269 getorf_JV gene  1060  1761     .  FALSE
## 81  contig_1269_1800_3650_- contig_1269 getorf_JV gene  1800  3650     .  FALSE
## 82  contig_1269_3699_7634_- contig_1269 getorf_JV gene  3699  7634     .  FALSE
## 83   contig_1269_486_1019_- contig_1269 getorf_JV gene   486  1019     .  FALSE
## 84 contig_1269_7688_10429_- contig_1269 getorf_JV gene  7688 10429     .  FALSE
##    phase attributes seq_length subject_id identity alignment_length mismatches
## 79     1        351      10490 AVZ66287.1    0.614              114         44
## 80     1        702      10490 AVZ66285.1    0.723              228         63
## 81     1       1851      10490 AMO03224.1    1.000              617          0
## 82     1       3936      10490 AMO03223.1    0.996             1312          5
## 83     1        534      10490 AVZ66286.1    0.764              178         42
## 84     1       2742      10490 AMO03223.1    0.988              796         10
##    gap_opens qstart qend sstart send     evalue bitscore
## 79         0      1  114     21  134  4.967e-38      141
## 80         0      5  232      3  230 2.271e-107      347
## 81         0      1  617      1  617  0.000e+00     1250
## 82         0      1 1312    933 2244  0.000e+00     2709
## 83         0      1  178      1  178  1.457e-83      276
## 84         0      1  796      1  796  0.000e+00     1620
##                               annotation
## 79 hypothetical protein [Brandeis virus]
## 80 hypothetical protein [Brandeis virus]
## 81  hypothetical protein [Muthill virus]
## 82           polyprotein [Muthill virus]
## 83 hypothetical protein [Brandeis virus]
## 84           polyprotein [Muthill virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name       seqid    source type start   end score strand
## 79    contig_1269_106_456_- contig_1269 getorf_JV gene   106   456     .  FALSE
## 80  contig_1269_1060_1761_- contig_1269 getorf_JV gene  1060  1761     .  FALSE
## 81  contig_1269_1800_3650_- contig_1269 getorf_JV gene  1800  3650     .  FALSE
## 82  contig_1269_3699_7634_- contig_1269 getorf_JV gene  3699  7634     .  FALSE
## 83   contig_1269_486_1019_- contig_1269 getorf_JV gene   486  1019     .  FALSE
## 84 contig_1269_7688_10429_- contig_1269 getorf_JV gene  7688 10429     .  FALSE
##    phase attributes seq_length subject_id identity alignment_length mismatches
## 79     1        351      10490 AVZ66287.1    0.614              114         44
## 80     1        702      10490 AVZ66285.1    0.723              228         63
## 81     1       1851      10490 AMO03224.1    1.000              617          0
## 82     1       3936      10490 AMO03223.1    0.996             1312          5
## 83     1        534      10490 AVZ66286.1    0.764              178         42
## 84     1       2742      10490 AMO03223.1    0.988              796         10
##    gap_opens qstart qend sstart send     evalue bitscore
## 79         0      1  114     21  134  4.967e-38      141
## 80         0      5  232      3  230 2.271e-107      347
## 81         0      1  617      1  617  0.000e+00     1250
## 82         0      1 1312    933 2244  0.000e+00     2709
## 83         0      1  178      1  178  1.457e-83      276
## 84         0      1  796      1  796  0.000e+00     1620
##                               annotation
## 79 hypothetical protein [Brandeis virus]
## 80 hypothetical protein [Brandeis virus]
## 81  hypothetical protein [Muthill virus]
## 82           polyprotein [Muthill virus]
## 83 hypothetical protein [Brandeis virus]
## 84           polyprotein [Muthill virus]

Muthill virus was reported in Webster et al. 2016 and found in D. immigrans.

Chaq virus mostly in D. simulans

Define the corresponding contigs :

contig_set=c("contig_7817")

for phylogeny, see paragraph 2.3 (Chaq virus in D. mel).

Drosophila_immigrans_Nora_virus in D. immigrans (99.4% identity)

Define the corresponding contigs :

contig_set=c("contig_1582", "contig_22830")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Noravirus_D.im"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Noravirus_D.im")
##                     orf_name        seqid    source type start  end score
## 148     contig_1582_3_4766_-  contig_1582 getorf_JV gene     3 4766     .
## 149  contig_1582_4772_6226_-  contig_1582 getorf_JV gene  4772 6226     .
## 300 contig_22830_3250_4134_- contig_22830 getorf_JV gene  3250 4134     .
## 301  contig_22830_410_3184_- contig_22830 getorf_JV gene   410 3184     .
## 302 contig_22830_4112_4390_- contig_22830 getorf_JV gene  4112 4390     .
##     strand phase attributes seq_length subject_id identity alignment_length
## 148  FALSE     1       4764       6778 ABC55268.2    0.575             1648
## 149  FALSE     1       1455       6778 AHZ92153.1    0.995              485
## 300  FALSE     1        885       4474 AHZ92154.1    0.993              295
## 301  FALSE     1       2775       4474 ABC55270.1    0.810              925
## 302  FALSE     1        279       4474 AHZ92156.1    1.000               93
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 148        674         0      1 1588      1 1648  0.000e+00     1875
## 149          2         0      1  485      1  485 2.590e-310      947
## 300          2         0      1  295      1  295 5.381e-175      546
## 301        174         0      1  925      1  918  0.000e+00     1532
## 302          0         0      1   93   1951 2043  1.120e-59      202
##                                annotation
## 148  replication polyprotein [Nora virus]
## 149 VP1 [Drosophila immigrans Nora virus]
## 300 VP3 [Drosophila immigrans Nora virus]
## 301                    ORF 4 [Nora virus]
## 302 VP2 [Drosophila immigrans Nora virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                     orf_name        seqid    source type start  end score
## 148     contig_1582_3_4766_-  contig_1582 getorf_JV gene     3 4766     .
## 149  contig_1582_4772_6226_-  contig_1582 getorf_JV gene  4772 6226     .
## 300 contig_22830_3250_4134_- contig_22830 getorf_JV gene  3250 4134     .
## 301  contig_22830_410_3184_- contig_22830 getorf_JV gene   410 3184     .
## 302 contig_22830_4112_4390_- contig_22830 getorf_JV gene  4112 4390     .
##     strand phase attributes seq_length subject_id identity alignment_length
## 148  FALSE     1       4764       6778 ABC55268.2    0.575             1648
## 149  FALSE     1       1455       6778 AHZ92153.1    0.995              485
## 300  FALSE     1        885       4474 AHZ92154.1    0.993              295
## 301  FALSE     1       2775       4474 ABC55270.1    0.810              925
## 302  FALSE     1        279       4474 AHZ92156.1    1.000               93
##     mismatches gap_opens qstart qend sstart send     evalue bitscore
## 148        674         0      1 1588      1 1648  0.000e+00     1875
## 149          2         0      1  485      1  485 2.590e-310      947
## 300          2         0      1  295      1  295 5.381e-175      546
## 301        174         0      1  925      1  918  0.000e+00     1532
## 302          0         0      1   93   1951 2043  1.120e-59      202
##                                annotation
## 148  replication polyprotein [Nora virus]
## 149 VP1 [Drosophila immigrans Nora virus]
## 300 VP3 [Drosophila immigrans Nora virus]
## 301                    ORF 4 [Nora virus]
## 302 VP2 [Drosophila immigrans Nora virus]

Bloomfield_virus in D. melanogaster (identity 99%)

Define the corresponding contigs :

contig_set=c("contig_13750", "contig_6964", "contig_17142")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Bloomfield_virus_D.mel"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)

res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Bloomfield_virus_D.mel")
##                    orf_name        seqid    source type start  end score strand
## 119 contig_13750_119_1306_- contig_13750 getorf_JV gene   119 1306     .  FALSE
## 184 contig_17142_383_1171_+ contig_17142 getorf_JV gene   383 1171     .   TRUE
## 376    contig_6964_1_2091_-  contig_6964 getorf_JV gene     1 2091     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 119     1       1188       1344 AKH40315.1    0.997              396          1
## 184     1        789       1171 AKH40312.1    1.000              263          0
## 376     1       2091       2132 AKH40311.1    0.988              696          8
##     gap_opens qstart qend sstart send     evalue bitscore
## 119         0      1  396    151  546 1.133e-247      762
## 184         0      1  263    688  950 2.625e-170      531
## 376         0      1  696    509 1204  0.000e+00     1385
##                                         annotation
## 119                        ORF1 [Bloomfield virus]
## 184                        ORF1 [Bloomfield virus]
## 376 putative major core protein [Bloomfield virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                    orf_name        seqid    source type start  end score strand
## 119 contig_13750_119_1306_- contig_13750 getorf_JV gene   119 1306     .  FALSE
## 184 contig_17142_383_1171_+ contig_17142 getorf_JV gene   383 1171     .   TRUE
## 376    contig_6964_1_2091_-  contig_6964 getorf_JV gene     1 2091     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 119     1       1188       1344 AKH40315.1    0.997              396          1
## 184     1        789       1171 AKH40312.1    1.000              263          0
## 376     1       2091       2132 AKH40311.1    0.988              696          8
##     gap_opens qstart qend sstart send     evalue bitscore
## 119         0      1  396    151  546 1.133e-247      762
## 184         0      1  263    688  950 2.625e-170      531
## 376         0      1  696    509 1204  0.000e+00     1385
##                                         annotation
## 119                        ORF1 [Bloomfield virus]
## 184                        ORF1 [Bloomfield virus]
## 376 putative major core protein [Bloomfield virus]

Prestney_Burn_D.sub|obs in D. subobscura (100%)

Define the corresponding contigs :

contig_set=c("contig_5504")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Prestney_Burn_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Prestney_Burn_D.sub|obs")
##                   orf_name       seqid    source type start  end score strand
## 348    contig_5504_1_468_- contig_5504 getorf_JV gene     1  468     .  FALSE
## 349 contig_5504_528_1862_- contig_5504 getorf_JV gene   528 1862     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 348     1        468       1912 AWY11067.1    0.935              156         10
## 349     1       1335       1912 AMO03210.1    0.993              445          3
##     gap_opens qstart qend sstart send     evalue bitscore
## 348         0      1  156      1  156  4.938e-97      313
## 349         0      1  445    134  578 2.114e-285      873
##                                                   annotation
## 348 putative RNA-dependent RNA polymerase [Motts Mill virus]
## 349             hypothetical protein 1 [Prestney Burn virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name       seqid    source type start  end score strand
## 348    contig_5504_1_468_- contig_5504 getorf_JV gene     1  468     .  FALSE
## 349 contig_5504_528_1862_- contig_5504 getorf_JV gene   528 1862     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 348     1        468       1912 AWY11067.1    0.935              156         10
## 349     1       1335       1912 AMO03210.1    0.993              445          3
##     gap_opens qstart qend sstart send     evalue bitscore
## 348         0      1  156      1  156  4.938e-97      313
## 349         0      1  445    134  578 2.114e-285      873
##                                                   annotation
## 348 putative RNA-dependent RNA polymerase [Motts Mill virus]
## 349             hypothetical protein 1 [Prestney Burn virus]

Motts_Mill_virus_D.sub_virus in D. subobscura (85%)

Define the corresponding contigs :

contig_set=c("contig_12896")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Motts_Mill_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Motts_Mill_D.sub|obs")
##                   orf_name        seqid    source type start  end score strand
## 86   contig_12896_10_615_+ contig_12896 getorf_JV gene    10  615     .   TRUE
## 87 contig_12896_739_1119_+ contig_12896 getorf_JV gene   739 1119     .   TRUE
##    phase attributes seq_length subject_id identity alignment_length mismatches
## 86     1        606       1121 AWY11140.1    0.851              202         30
## 87     1        381       1121 AKH40293.1    0.488              125         63
##    gap_opens qstart qend sstart send     evalue bitscore
## 86         0      1  202      1  202 9.484e-111      355
## 87         0      2  126     14  138  1.168e-31      123
##                          annotation
## 86          orf1 [Motts Mill virus]
## 87 orf2, partial [Motts Mill virus]
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name        seqid    source type start  end score strand
## 86   contig_12896_10_615_+ contig_12896 getorf_JV gene    10  615     .   TRUE
## 87 contig_12896_739_1119_+ contig_12896 getorf_JV gene   739 1119     .   TRUE
##    phase attributes seq_length subject_id identity alignment_length mismatches
## 86     1        606       1121 AWY11140.1    0.851              202         30
## 87     1        381       1121 AKH40293.1    0.488              125         63
##    gap_opens qstart qend sstart send     evalue bitscore
## 86         0      1  202      1  202 9.484e-111      355
## 87         0      2  126     14  138  1.168e-31      123
##                          annotation
## 86          orf1 [Motts Mill virus]
## 87 orf2, partial [Motts Mill virus]

Pure Dark matter

Dark matter 3 mostly in Leptopilina…

Define the corresponding contigs :

contig_set=NA
contig_set_unassigned=paste0("contig_", c(20830))
# store for later fusion of corresponding lines
virus_list$"Dark1"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)


res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Dark1")
##                   orf_name        seqid    source type start  end score strand
## 263 contig_20830_68_1042_- contig_20830 getorf_JV gene    68 1042     .  FALSE
## 264  contig_20830_68_778_- contig_20830 getorf_JV gene    68  778     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 263     2        975       1044       <NA>       NA               NA         NA
## 264     1        711       1044       <NA>       NA               NA         NA
##     gap_opens qstart qend sstart send evalue bitscore annotation
## 263        NA     NA   NA     NA   NA     NA       NA       <NA>
## 264        NA     NA   NA     NA   NA     NA       NA       <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]

res[2]
## [[1]]
##                   orf_name        seqid    source type start  end score strand
## 263 contig_20830_68_1042_- contig_20830 getorf_JV gene    68 1042     .  FALSE
## 264  contig_20830_68_778_- contig_20830 getorf_JV gene    68  778     .  FALSE
##     phase attributes seq_length subject_id identity alignment_length mismatches
## 263     2        975       1044       <NA>       NA               NA         NA
## 264     1        711       1044       <NA>       NA               NA         NA
##     gap_opens qstart qend sstart send evalue bitscore annotation
## 263        NA     NA   NA     NA   NA     NA       NA       <NA>
## 264        NA     NA   NA     NA   NA     NA       NA       <NA>

contig_20830 => MAG: hypothetical protein [Diaphorina citri cimodo-like virus] Sequence ID: QXG83187.1Length: 636 evalue 0.006

Create a summary table

head(virus_list)
## $Parvoviridae_Pachy
## $Parvoviridae_Pachy$contig_set
## [1] "contig_2320"
## 
## $Parvoviridae_Pachy$contig_set_unassigned
## [1] NA
## 
## 
## $Vesantovirus_D.sub
## $Vesantovirus_D.sub$contig_set
## [1] "contig_2799"  "contig_14992" "contig_2780"  "contig_2857"  "contig_22871"
## [6] "contig_2659"  "contig_8503"  "contig_15585"
## 
## $Vesantovirus_D.sub$contig_set_unassigned
## [1] "contig_7654"  "contig_17519"
## 
## 
## $Parvoviridae2
## $Parvoviridae2$contig_set
## [1] "contig_15192"
## 
## $Parvoviridae2$contig_set_unassigned
## [1] NA
## 
## 
## $Linvill_road_virus_D.sim
## $Linvill_road_virus_D.sim$contig_set
## [1] "contig_627" "contig_626"
## 
## $Linvill_road_virus_D.sim$contig_set_unassigned
## [1] NA
## 
## 
## $LbFV_L.b
## $LbFV_L.b$contig_set
##  [1] "contig_1505"  "contig_22345" "contig_1350"  "contig_22895" "contig_22365"
##  [6] "contig_22449" "contig_22533" "contig_19307" "contig_12283" "contig_22381"
## 
## $LbFV_L.b$contig_set_unassigned
## [1] NA
## 
## 
## $LhFV_L.h
## $LhFV_L.h$contig_set
##  [1] "contig_9355"  "contig_21206" "contig_19696" "contig_3127"  "contig_356"  
##  [6] "contig_682"   "contig_22485" "contig_223"   "contig_701"   "contig_22588"
## [11] "contig_2709"  "contig_19153"
## 
## $LhFV_L.h$contig_set_unassigned
## [1] NA
list_res=list()
for (i in 1:length(virus_list)){
  data=virus_list[[i]]
  #data = data[!is.na(data)]
  n1=length(data$contig_set[!is.na(data$contig_set)])
  n2=length(data$contig_set_unassigned[!is.na(data$contig_set_unassigned)])
  n=n1+n2
  virus_name=rep(paste0(names(virus_list)[i], " n=", n), n)
  contig_name=c(data$contig_set[!is.na(data$contig_set)], data$contig_set_unassigned[!is.na(data$contig_set_unassigned)])
  homologs=c(rep(TRUE, n1), rep(FALSE, n2))
  d=data.frame(virus_name, contig_name, homologs)
  list_res[[i]]=d
}

summary_table=do.call(what = rbind.data.frame, list_res)
write.table(summary_table, file = "../TABLES/summary_table.txt", col.names = TRUE, row.names = FALSE, quote=FALSE, sep="\t")
head(summary_table)

check that all contigs have been analyzed.

wga_table2=read.table("../TABLES/wga_cov2_viruses_plus_unassigned_clean2.tab.txt",  header = TRUE, sep="\t")
wta_table2=read.table(file = "../TABLES/wta_cov2_viruses_plus_unassigned2.tab.txt", header = TRUE, sep="\t")

all_contigs=c(rownames(wga_table2), rownames(wta_table2))
not_analyzed=setdiff(all_contigs, summary_table$contig_name)
wga_table2[wga_table2$contig_name %in% not_analyzed,]
wta_table2[wta_table2$contig_name %in% not_analyzed,]